diff --git a/.appveyor.yml b/.appveyor.yml
index d31f9aed..c74e89db 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -9,6 +9,7 @@ environment:
   - MSYSTEM: MINGW64
     CPU: x86_64
     CONFIG_FLAGS: --enable-debug
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
@@ -16,21 +17,30 @@ environment:
   - MSYSTEM: MINGW32
     CPU: i686
     CONFIG_FLAGS: --enable-debug
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
+    CONFIG_FLAGS:
   - MSYSTEM: MINGW64
     CPU: x86_64
+    CONFIG_FLAGS:
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
+    CONFIG_FLAGS:
   - MSYSTEM: MINGW32
     CPU: i686
+    CONFIG_FLAGS:
+    EXTRA_CFLAGS: "-fcommon"
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Syuu
+  - pacman --noconfirm -S autoconf
 
 build_script:
   - bash -c "autoconf"
diff --git a/.cirrus.yml b/.cirrus.yml
deleted file mode 100644
index 75695398..00000000
--- a/.cirrus.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-env:
-  CIRRUS_CLONE_DEPTH: 1
-  ARCH: amd64
-
-task:
-  matrix:
-      env:
-        DEBUG_CONFIG: --enable-debug
-      env:
-        DEBUG_CONFIG: --disable-debug
-  matrix:
-    - env:
-        PROF_CONFIG: --enable-prof
-    - env:
-        PROF_CONFIG: --disable-prof
-  matrix:
-    - name: 64-bit
-      env:
-        CC:
-        CXX:
-    - name: 32-bit
-      env:
-        CC: cc -m32
-        CXX: c++ -m32
-  matrix:
-    - env:
-        UNCOMMON_CONFIG:
-    - env:
-        UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
-  freebsd_instance:
-    matrix:
-      image: freebsd-12-3-release-amd64
-  install_script:
-    - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
-    - pkg upgrade -y
-    - pkg install -y autoconf gmake
-  script:
-    - autoconf
-    # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
-    # we count as a system header, but breaks otherwise, like during these
-    # tests.
-    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
-    - export JFLAG=`sysctl -n kern.smp.cpus`
-    - gmake -j${JFLAG}
-    - gmake -j${JFLAG} tests
-    - gmake check
diff --git a/.clang-format b/.clang-format
index 719c03c5..527ec375 100644
--- a/.clang-format
+++ b/.clang-format
@@ -4,10 +4,10 @@
 # AccessModifierOffset: -2
 AlignAfterOpenBracket: DontAlign
 AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
+AlignConsecutiveDeclarations: true
 AlignEscapedNewlines: Right
 AlignOperands: false
-AlignTrailingComments: false
+AlignTrailingComments: true
 AllowAllParametersOfDeclarationOnNextLine: true
 AllowShortBlocksOnASingleLine: false
 AllowShortCaseLabelsOnASingleLine: false
@@ -20,16 +20,16 @@ AlwaysBreakBeforeMultilineStrings: true
 BinPackArguments: true
 BinPackParameters: true
 BraceWrapping:
-  AfterClass: false
-  AfterControlStatement: false
-  AfterEnum: false
-  AfterFunction: false
-  AfterNamespace: false
-  AfterObjCDeclaration: false
-  AfterStruct: false
-  AfterUnion: false
-  BeforeCatch: false
-  BeforeElse: false
+  AfterClass: true
+  AfterControlStatement: true
+  AfterEnum: true
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: true
+  AfterStruct: true
+  AfterUnion: true
+  BeforeCatch: true
+  BeforeElse: true
   IndentBraces: false
 # BreakAfterJavaFieldAnnotations: true
 BreakBeforeBinaryOperators: NonAssignment
@@ -43,7 +43,7 @@ ColumnLimit: 80
 # CompactNamespaces: true
 # ConstructorInitializerAllOnOneLineOrOnePerLine: true
 # ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 2
+ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
 DerivePointerAlignment: false
 DisableFormat:   false
@@ -57,7 +57,7 @@ ForEachMacros:   [ ql_foreach, qr_foreach, ]
 # IncludeIsMainRegex: ''
 IndentCaseLabels: false
 IndentPPDirectives: AfterHash
-IndentWidth: 4
+IndentWidth: 8
 IndentWrappedFunctionNames: false
 # JavaImportGroups: []
 # JavaScriptQuotes: Leave
@@ -73,8 +73,8 @@ MaxEmptyLinesToKeep: 1
 # ObjCSpaceAfterProperty: false
 # ObjCSpaceBeforeProtocolList: false
 
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakAssignment: 100
+PenaltyBreakBeforeFirstCallParameter: 100
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
 PenaltyBreakString: 1000
@@ -96,7 +96,7 @@ PointerAlignment: Right
 #         - 'cpp'
 #       BasedOnStyle: llvm
 #       CanonicalDelimiter: 'cc'
-ReflowComments: true
+ReflowComments: false
 SortIncludes: false
 SpaceAfterCStyleCast: false
 # SpaceAfterTemplateKeyword: true
@@ -107,7 +107,7 @@ SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 # SpaceBeforeRangeBasedForLoopColon: true
 SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
+SpacesBeforeTrailingComments: 1
 SpacesInAngles:  false
 SpacesInCStyleCastParentheses: false
 # SpacesInContainerLiterals: false
@@ -118,5 +118,5 @@ SpacesInSquareBrackets: false
 # used by some of the core jemalloc developers.
 # StatementMacros: []
 TabWidth: 8
-UseTab: Never
+UseTab: ForIndentation
 ...
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 00000000..7f5f6975
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,2 @@
+554185356bf990155df8d72060c4efe993642baf
+34f359e0ca613b5f9d970e9b2152a5203c9df8d6
diff --git a/.github/workflows/check_formatting.yaml b/.github/workflows/check_formatting.yaml
new file mode 100644
index 00000000..8a10065f
--- /dev/null
+++ b/.github/workflows/check_formatting.yaml
@@ -0,0 +1,10 @@
+name: 'Check Formatting'
+on: [pull_request]
+jobs:
+  check-formatting:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+      - name: Check for trailing whitespace
+        run: scripts/check_trailing_whitespace.sh
diff --git a/.github/workflows/freebsd-ci.yml b/.github/workflows/freebsd-ci.yml
new file mode 100644
index 00000000..6c702d88
--- /dev/null
+++ b/.github/workflows/freebsd-ci.yml
@@ -0,0 +1,66 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: FreeBSD CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-freebsd:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        debug: ['--enable-debug', '--disable-debug']
+        prof: ['--enable-prof', '--disable-prof']
+        arch: ['64-bit', '32-bit']
+        uncommon:
+          - ''
+          - '--with-lg-page=16 --with-malloc-conf=tcache:false'
+
+    name: FreeBSD (${{ matrix.arch }}, debug=${{ matrix.debug }}, prof=${{ matrix.prof }}${{ matrix.uncommon && ', uncommon' || '' }})
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 1
+
+    - name: Test on FreeBSD
+      uses: vmactions/freebsd-vm@v1
+      with:
+        release: '15.0'
+        usesh: true
+        prepare: |
+          pkg install -y autoconf gmake
+        run: |
+          # Verify we're running in FreeBSD
+          echo "==== System Information ===="
+          uname -a
+          freebsd-version
+          echo "============================"
+
+          # Set compiler flags for 32-bit if needed
+          if [ "${{ matrix.arch }}" = "32-bit" ]; then
+            export CC="cc -m32"
+            export CXX="c++ -m32"
+          fi
+
+          # Generate configure script
+          autoconf
+
+          # Configure with matrix options
+          ./configure --with-jemalloc-prefix=ci_ ${{ matrix.debug }} ${{ matrix.prof }} ${{ matrix.uncommon }}
+
+          # Get CPU count for parallel builds
+          export JFLAG=$(sysctl -n kern.smp.cpus)
+
+          gmake -j${JFLAG}
+          gmake -j${JFLAG} tests
+          gmake check
+
+
+
diff --git a/.github/workflows/linux-ci.yml b/.github/workflows/linux-ci.yml
new file mode 100644
index 00000000..c5e0c9aa
--- /dev/null
+++ b/.github/workflows/linux-ci.yml
@@ -0,0 +1,695 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: Linux CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-linux:
+    runs-on: ubuntu-24.04
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-stats"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --disable-stats"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary,percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC }}
+        CXX: ${{ matrix.env.CXX }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+  test-linux-arm64:
+    runs-on: ubuntu-24.04-arm
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-lg-hugepage=29"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC }}
+        CXX: ${{ matrix.env.CXX }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+
diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml
new file mode 100644
index 00000000..585551d0
--- /dev/null
+++ b/.github/workflows/macos-ci.yml
@@ -0,0 +1,212 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: macOS CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-macos:
+    runs-on: macos-15-intel
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+  test-macos-arm64:
+    runs-on: macos-15
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-lg-hugepage=29"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+
diff --git a/.github/workflows/static_analysis.yaml b/.github/workflows/static_analysis.yaml
new file mode 100644
index 00000000..29e617fc
--- /dev/null
+++ b/.github/workflows/static_analysis.yaml
@@ -0,0 +1,68 @@
+name: 'Static Analysis'
+on: [pull_request]
+jobs:
+  static-analysis:
+    runs-on: ubuntu-latest
+    steps:
+      # We build libunwind ourselves because sadly the version
+      # provided by Ubuntu via apt-get is much too old.
+      - name: Check out libunwind
+        uses: actions/checkout@v4
+        with:
+          repository: libunwind/libunwind
+          path: libunwind
+          ref: 'v1.6.2'
+          github-server-url: 'https://github.com'
+      - name: Install libunwind
+        run: |
+          cd libunwind
+          autoreconf -i
+          ./configure --prefix=/usr
+          make -s -j $(nproc) V=0
+          sudo make -s install V=0
+          cd ..
+          rm -rf libunwind
+      - name: Check out repository
+        uses: actions/checkout@v4
+      # We download LLVM directly from the latest stable release
+      # on GitHub, because this tends to be much newer than the
+      # version available via apt-get in Ubuntu.
+      - name: Download LLVM
+        uses: dsaltares/fetch-gh-release-asset@master
+        with:
+          repo: 'llvm/llvm-project'
+          version: 'tags/llvmorg-16.0.4'
+          file: 'clang[+]llvm-.*x86_64-linux-gnu.*'
+          regex: true
+          target: 'llvm_assets/'
+          token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Install prerequisites
+        id: install_prerequisites
+        run: |
+          tar -C llvm_assets -xaf llvm_assets/*.tar* &
+          sudo apt-get update
+          sudo apt-get install -y jq bear python3-pip
+          pip install codechecker
+          echo "Extracting LLVM from tar" 1>&2
+          wait
+          echo "LLVM_BIN_DIR=$(echo llvm_assets/clang*/bin)" >> "$GITHUB_OUTPUT"
+      - name: Run static analysis
+        id: run_static_analysis
+        run: >
+          PATH="${{ steps.install_prerequisites.outputs.LLVM_BIN_DIR }}:$PATH"
+          LDFLAGS='-L/usr/lib'
+          scripts/run_static_analysis.sh static_analysis_results "$GITHUB_OUTPUT"
+      - name: Upload static analysis results
+        if: ${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }} == '1'
+        uses: actions/upload-artifact@v4
+        with:
+          name: static_analysis_results
+          path: static_analysis_results
+      - name: Check static analysis results
+        run: |
+          if [[ "${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }}" == '1' ]]
+          then
+              echo "::error::Static analysis found issues with your code. Download the 'static_analysis_results' artifact from this workflow and view the 'index.html' file contained within it in a web browser locally for detailed results."
+              exit 1
+          fi
+
diff --git a/.github/workflows/windows-ci.yml b/.github/workflows/windows-ci.yml
new file mode 100644
index 00000000..f40ba086
--- /dev/null
+++ b/.github/workflows/windows-ci.yml
@@ -0,0 +1,155 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: Windows CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-windows:
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CONFIGURE_FLAGS: --enable-debug
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CROSS_COMPILE_32BIT: yes
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CROSS_COMPILE_32BIT: yes
+              CONFIGURE_FLAGS: --enable-debug
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      shell: cmd
+      run: |
+        echo === Windows Version ===
+        systeminfo | findstr /B /C:"OS Name" /C:"OS Version"
+        ver
+        echo.
+        echo === Architecture ===
+        echo PROCESSOR_ARCHITECTURE=%PROCESSOR_ARCHITECTURE%
+        echo.
+
+    - name: Setup MSYS2
+      uses: msys2/setup-msys2@v2
+      with:
+        msystem: ${{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'MINGW32' || 'MINGW64' }}
+        update: true
+        install: >-
+          autotools
+          git
+        pacboy: >-
+          make:p
+          gcc:p
+          binutils:p
+
+    - name: Build and test (MinGW-GCC)
+      if: matrix.env.CC != 'cl.exe'
+      shell: msys2 {0}
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+    - name: Setup MSVC environment
+      if: matrix.env.CC == 'cl.exe'
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        arch: ${{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'x86' || 'x64' }}
+
+    - name: Build and test (MSVC)
+      if: matrix.env.CC == 'cl.exe'
+      shell: msys2 {0}
+      env:
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        MSYS2_PATH_TYPE: inherit
+      run: |
+        # Export MSVC environment variables for configure
+        export CC=cl.exe
+        export CXX=cl.exe
+        export AR=lib.exe
+        export NM=dumpbin.exe
+        export RANLIB=:
+
+        # Verify cl.exe is accessible (should be in PATH via inherit)
+        if ! which cl.exe > /dev/null 2>&1; then
+          echo "cl.exe not found, trying to locate MSVC..."
+          # Find and add MSVC bin directory to PATH
+          MSVC_BIN=$(cmd.exe /c "echo %VCToolsInstallDir%" | tr -d '\\r' | sed 's/\\\\\\\\/\//g' | sed 's/C:/\\/c/g')
+          if [ -n "$MSVC_BIN" ]; then
+            export PATH="$PATH:$MSVC_BIN/bin/Hostx64/x64:$MSVC_BIN/bin/Hostx86/x86"
+          fi
+        fi
+
+        # Run autoconf
+        autoconf
+
+        # Configure with MSVC
+        ./configure CC=cl.exe CXX=cl.exe AR=lib.exe $CONFIGURE_FLAGS
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        # Build tests sequentially due to PDB file issues
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+
+
diff --git a/.gitignore b/.gitignore
index 1c0b3385..95dbaa5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,13 @@
 /src/*.[od]
 /src/*.sym
 
+# These are semantically meaningful for clangd and related tooling.
+/build/
+/.cache/
+compile_commands.json
+/static_analysis_raw_results
+/static_analysis_results
+
 /run_tests.out/
 
 /test/test.sh
@@ -66,6 +73,7 @@ test/include/test/jemalloc_test_defs.h
 
 /test/stress/[A-Za-z]*
 !/test/stress/[A-Za-z]*.*
+!/test/stress/pa/
 /test/stress/*.[od]
 /test/stress/*.out
 
diff --git a/.travis.yml b/.travis.yml
index bf44fad4..643da4f1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,82 +6,10 @@
 # Differences are explained here:
 # https://docs.travis-ci.com/user/languages/minimal-and-generic/
 language: minimal
-dist: focal
+dist: jammy
 
 jobs:
   include:
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -109,6 +37,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -142,6 +73,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+    - os: linux
+      arch: amd64
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
       env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
@@ -172,6 +106,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -199,6 +136,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -223,6 +163,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -244,6 +187,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -262,6 +208,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -277,6 +226,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -289,6 +241,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -301,6 +256,18 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -320,62 +287,47 @@ jobs:
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+    - os: linux
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      arch: ppc64le
+      arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/ChangeLog b/ChangeLog
index 32fde562..3bc84360 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,154 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.3.1 (Apr 13, 2026)
+
+This release includes over 390 commits spanning bug fixes, new features,
+performance optimizations, and portability improvements.  Multiple percent
+of system-level metric improvements were measured in tested production
+workloads.  The release has gone through large-scale production testing
+at Meta.
+
+New features:
+  - Support pvalloc.  (@Lapenkov: 5b1f2cc5)
+  - Add double free detection for the debug build.  (@izaitsevfb:
+    36366f3c, @guangli-dai: 42daa1ac, @divanorama: 1897f185)
+  - Add compile-time option `--enable-pageid` to enable memory mapping
+    annotation.  (@devnexen: 4fc5c4fb)
+  - Add runtime option `prof_bt_max` to control the max stack depth for
+    profiling.  (@guangli-dai: a0734fd6)
+  - Add compile-time option `--enable-force-getenv` to use `getenv` instead
+    of `secure_getenv`.  (@interwq: 481bbfc9)
+  - Add compile-time option `--disable-dss` to disable the usage of
+    `sbrk(2)`.  (@Svetlitski: ea5b7bea)
+  - Add runtime option `tcache_ncached_max` to control the number of items
+    in each size bin in the thread cache.  (@guangli-dai: 8a22d10b)
+  - Add runtime option `calloc_madvise_threshold` to determine if kernel or
+    memset is used to zero the allocations for calloc.  (@nullptr0-0:
+    5081c16b)
+  - Add compile-time option `--disable-user-config` to disable reading the
+    runtime configurations from `/etc/malloc.conf` or environment variable
+    `MALLOC_CONF`.  (@roblabla: c17bf8b3)
+  - Add runtime option `disable_large_size_classes` to guard the new usable
+    size calculation, which minimizes the memory overhead for large
+    allocations, i.e., >= 4 * PAGE.  (@guangli-dai: c067a55c, 8347f104)
+  - Enable process_madvise usage, add runtime option
+    `process_madvise_max_batch` to control the max # of regions in each
+    madvise batch.  (@interwq: 22440a02, @spredolac: 4246475b)
+  - Add mallctl interfaces:
+    + `opt.prof_bt_max`  (@guangli-dai: a0734fd6)
+    + `arena.<i>.name` to set and get arena names.  (@guangli-dai: ba19d2cb)
+    + `thread.tcache.max` to set and get the `tcache_max` of the current
+      thread.  (@guangli-dai: a442d9b8)
+    + `thread.tcache.ncached_max.write` and
+      `thread.tcache.ncached_max.read_sizeclass` to set and get the
+      `ncached_max` setup of the current thread.  (@guangli-dai: 630f7de9,
+      6b197fdd)
+    + `arenas.hugepage` to return the hugepage size used, also exported to
+      malloc stats.  (@ilvokhin: 90c627ed)
+    + `approximate_stats.active` to return an estimate of the current active
+      bytes, which should not be compared with other stats retrieved.
+      (@guangli-dai: 0988583d)
+
+Bug fixes:
+  - Prevent potential deadlocks in decaying during reentrancy.  (@interwq:
+    434a68e2)
+  - Fix segfault in extent coalescing.  (@Svetlitski: 12311fe6)
+  - Add null pointer detections in mallctl calls.  (@Svetlitski: dc0a184f,
+    0288126d)
+  - Make mallctl `arenas.lookup` triable without crashing on invalid
+    pointers.  (@auxten: 019cccc2, 5bac3849)
+  - Demote sampled allocations for proper deallocations during
+    `arena_reset`.  (@Svetlitski: 62648c88)
+  - Fix jemalloc's `read(2)` and `write(2)`.  (@Svetlitski: d2c9ed3d, @lexprfuncall:
+    9fdc1160)
+  - Fix the pkg-config metadata file.  (@BtbN: ed7e6fe7, ce8ce99a)
+  - Fix the autogen.sh so that it accepts quoted extra options.
+    (@honggyukim: f6fe6abd)
+  - Fix `rallocx()` to set errno to ENOMEM upon OOMing.  (@arter97: 38056fea,
+    @interwq: 83b07578)
+  - Avoid stack overflow for internal variable array usage.  (@nullptr0-0:
+    47c9bcd4, 48f66cf4, @xinydev: 9169e927)
+  - Fix background thread initialization race.  (@puzpuzpuz: 4d0ffa07)
+  - Guard os_page_id against a NULL address.  (@lexprfuncall: 79cc7dcc)
+  - Handle tcache init failures gracefully.  (@lexprfuncall: a056c20d)
+  - Fix missing release of acquired neighbor edata in
+    extent_try_coalesce_impl.  (@spredolac: 675ab079)
+  - Fix memory leak of old curr_reg on san_bump_grow_locked failure.
+    (@spredolac: 5904a421)
+  - Fix large alloc nrequests under-counting on cache misses.  (@spredolac:
+    3cc56d32)
+
+Portability improvements:
+  - Fix the build in C99.  (@abaelhe: 56ddbea2)
+  - Add `pthread_setaffinity_np` detection for non Linux/BSD platforms.
+    (@devnexen: 4c95c953)
+  - Make `VARIABLE_ARRAY` compatible with compilers not supporting VLA,
+    i.e., Visual Studio C compiler in C11 or C17 modes.  (@madscientist:
+    be65438f)
+  - Fix the build on Linux using musl library.  (@marv: aba1645f, 45249cf5)
+  - Reduce the memory overhead in small allocation sampling for systems
+    with larger page sizes, e.g., ARM.  (@Svetlitski: 5a858c64)
+  - Add C23's `free_sized` and `free_aligned_sized`.  (@Svetlitski:
+    cdb2c0e0)
+  - Enable heap profiling on MacOS.  (@nullptr0-0: 4b555c11)
+  - Fix incorrect printing on 32bit.  (@sundb: 630434bb)
+  - Make `JEMALLOC_CXX_THROW` compatible with C++ versions newer than
+    C++17.  (@r-barnes, @guangli-dai: 21bcc0a8)
+  - Fix mmap tag conflicts on MacOS.  (@kdrag0n: c893fcd1)
+  - Fix monotonic timer assumption for win32.  (@burtonli: 8dc97b11)
+  - Fix VM over-reservation on systems with larger pages, e.g., aarch64.
+    (@interwq: cd05b19f)
+  - Remove `unreachable()` macro conditionally to prevent definition
+    conflicts for C23+.  (@appujee: d8486b26, 4b88bddb)
+  - Fix dlsym failure observed on FreeBSD.  (@rhelmot: 86bbabac)
+  - Change the default page size to 64KB on aarch64 Linux.  (@lexprfuncall:
+    9442300c)
+  - Update config.guess and config.sub to the latest version.
+    (@lexprfuncall: c51949ea)
+  - Determine the page size on Android from NDK header files.
+    (@lexprfuncall: c51abba1)
+  - Improve the portability of grep patterns in configure.ac.
+    (@lexprfuncall: 365747bc)
+  - Add compile-time option `--with-cxx-stdlib` to specify the C++ standard
+    library.  (@yuxuanchen1997: a10ef3e1)
+
+Optimizations and refactors:
+  - Enable tcache for deallocation-only threads.  (@interwq: 143e9c4a)
+  - Inline to accelerate operator delete.  (@guangli-dai: e8f9f138)
+  - Optimize pairing heap's performance.  (@deadalnix: 5266152d, be6da4f6,
+    543e2d61, 10d71315, 92aa52c0, @Svetlitski: 36ca0c1b)
+  - Inline the storage for thread name in the profiling data.  (@interwq:
+    ce0b7ab6, e62aa478)
+  - Optimize a hot function `edata_cmp_summary_comp` to accelerate it.
+    (@Svetlitski: 6841110b, @guangli-dai: 0181aaa4)
+  - Allocate thread cache using the base allocator, which enables thread
+    cache to use thp when `metadata_thp` is turned on.  (@interwq:
+    72cfdce7)
+  - Allow oversize arena not to purge immediately when background threads
+    are enabled, although the default decay time is 0 to be back compatible.
+    (@interwq: d1313313)
+  - Optimize thread-local storage implementation on Windows.  (@mcfi:
+    9e123a83, 3a0d9cda)
+  - Optimize fast path to allow static size class computation.  (@interwq:
+    323ed2e3)
+  - Redesign tcache GC to regulate the frequency and make it
+    locality-aware. The new design is default on, guarded by option
+    `experimental_tcache_gc`.  (@nullptr0-0: 0c88be9e, e2c9f3a9,
+    14d5dc13, @deadalnix: 5afff2e4)
+  - Reduce the arena switching overhead by avoiding forced purging when
+    background thread is enabled.  (@interwq: a3910b98)
+  - Improve the reuse efficiency by limiting the maximum coalesced size for
+    large extents.  (@jiebinn: 3c14707b)
+  - Refactor thread events to allow registration of users' thread events
+    and remove prof_threshold as the built-in event.  (@spredolac: e6864c60,
+    015b0179, 34ace916)
+
+Documentation:
+  - Update Windows building instructions.  (@Lapenkov: 37139328)
+  - Add vcpkg installation instructions.  (@LilyWangLL: c0c9783e)
+  - Update profiling internals with an example.  (@jordalgo: b04e7666)
+
 * 5.3.0 (May 6, 2022)
 
   This release contains many speed and space optimizations, from micro
diff --git a/INSTALL.md b/INSTALL.md
index 90da718d..2333f13d 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -139,6 +139,7 @@ any of the following arguments (not a definitive list) to 'configure':
     in the following list that appears to function correctly:
 
     + libunwind      (requires --enable-prof-libunwind)
+    + frame pointer  (requires --enable-prof-frameptr)
     + libgcc         (unless --disable-prof-libgcc)
     + gcc intrinsics (unless --disable-prof-gcc)
 
@@ -147,6 +148,12 @@ any of the following arguments (not a definitive list) to 'configure':
     Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
     backtracing.
 
+* `--enable-prof-frameptr`
+
+    Use the optimized frame pointer unwinder for stack backtracing. Safe
+    to use in mixed code (with and without frame pointers) - but requires
+    frame pointers to produce meaningful stacks. Linux only.
+
 * `--disable-prof-libgcc`
 
     Disable the use of libgcc's backtracing functionality.
@@ -315,13 +322,13 @@ behavior:
     'configure' uses this to find programs.
 
 In some cases it may be necessary to work around configuration results that do
-not match reality.  For example, Linux 4.5 added support for the MADV_FREE flag
-to madvise(2), which can cause problems if building on a host with MADV_FREE
-support and deploying to a target without.  To work around this, use a cache
-file to override the relevant configuration variable defined in configure.ac,
-e.g.:
+not match reality.  For example, Linux 3.4 added support for the MADV_DONTDUMP
+flag to madvise(2), which can cause problems if building on a host with
+MADV_DONTDUMP support and deploying to a target without.  To work around this,
+use a cache file to override the relevant configuration variable defined in
+configure.ac, e.g.:
 
-    echo "je_cv_madv_free=no" > config.cache && ./configure -C
+    echo "je_cv_madv_dontdump=no" > config.cache && ./configure -C
 
 
 ## Advanced compilation
@@ -396,6 +403,102 @@ exclusively):
 
     Use this to search for programs used during configuration and building.
 
+## Building for Windows
+
+There are at least two ways to build jemalloc's libraries for Windows. They
+differ in their ease of use and flexibility.
+
+### With MSVC solutions
+This is the easy, but less flexible approach. It doesn't let you specify
+arguments to the `configure` script.
+  
+1. Install Cygwin with at least the following packages:
+   * autoconf
+   * autogen
+   * gawk
+   * grep
+   * sed
+
+2. Install Visual Studio 2015 or 2017 with Visual C++
+
+3. Add Cygwin\bin to the PATH environment variable
+
+4. Open "x64 Native Tools Command Prompt for VS 2017"
+   (note: x86/x64 doesn't matter at this point)
+
+5. Generate header files:
+   sh -c "CC=cl ./autogen.sh"
+
+6. Now the project can be opened and built in Visual Studio:
+   msvc\jemalloc_vc2017.sln
+
+### With MSYS
+This is a more involved approach that offers the same configuration flexibility
+as Linux builds. We use it for our CI workflow to test different jemalloc
+configurations on Windows.
+
+1. Install the prerequisites
+    1. MSYS2
+    2. Chocolatey
+    3. Visual Studio if you want to compile with MSVC compiler
+
+2. Run your bash emulation. It could be MSYS2 or Git Bash (this manual was
+   tested on both)
+3. Manually and selectively follow
+   [before_install.sh](https://github.com/jemalloc/jemalloc/blob/dev/scripts/windows/before_install.sh)
+   script.
+    1. Skip the `TRAVIS_OS_NAME` check, `rm -rf C:/tools/msys64` and `choco
+       uninstall/upgrade` part.
+    2.  If using `msys2` shell, add path to `RefreshEnv.cmd` to `PATH`:
+        `PATH="$PATH:/c/ProgramData/chocolatey/bin"`
+    3. Assign `msys_shell_cmd`, `msys2`, `mingw32` and `mingw64` as in the
+       script.
+    4. Pick `CROSS_COMPILE_32BIT` , `CC` and `USE_MSVC` values depending on
+       your needs. For instance, if you'd like to build for x86_64 Windows
+       with `gcc`, then `CROSS_COMPILE_32BIT="no"`, `CC="gcc"` and
+       `USE_MSVC=""`. If you'd like to build for x86 Windows with `cl.exe`,
+       then `CROSS_COMPILE_32BIT="yes"`, `CC="cl.exe"`, `USE_MSVC="x86"`.
+       For x86_64 builds with `cl.exe`, assign `USE_MSVC="amd64"` and
+       `CROSS_COMPILE_32BIT="no"`.
+    5. Replace the path to `vcvarsall.bat` with the path on your system. For
+       instance, on my Windows PC with Visual Studio 17, the path is
+       `C:\Program Files (x86)\Microsoft Visual
+       Studio\2017\BuildTools\VC\Auxiliary\Build\vcvarsall.bat`.
+    6. Execute the rest of the script. It will install the required
+       dependencies and assign the variable `build_env`, which is a function
+       that executes following commands with the correct environment
+       variables set.
+4. Use `$build_env <command>` as you would in a Linux shell:
+     1. `$build_env autoconf`
+     2. `$build_env ./configure CC="<desired compiler>" <configuration flags>`
+     3. `$build_env mingw32-make`
+
+If you're having any issues with the above, ensure the following:
+
+5. When you run `cmd //C RefreshEnv.cmd`, you get an output line starting with
+   `Refreshing` . If it errors saying `RefreshEnv.cmd` is not found, then you
+   need to add it to your `PATH` as described above in item 3.2
+
+6. When you run `cmd //C $vcvarsall`, it prints a bunch of environment
+   variables. Otherwise, check the path to the `vcvarsall.bat` in `$vcvarsall`
+   script and fix it.
+
+### Building from vcpkg
+
+The jemalloc port in vcpkg is kept up to date by Microsoft team members and
+community contributors. The url of vcpkg is: https://github.com/Microsoft/vcpkg
+. You can download and install jemalloc using the vcpkg dependency manager:
+
+```shell
+git clone https://github.com/Microsoft/vcpkg.git
+cd vcpkg
+./bootstrap-vcpkg.sh  # ./bootstrap-vcpkg.bat for Windows
+./vcpkg integrate install
+./vcpkg install jemalloc
+```
+
+If the version is out of date, please [create an issue or pull
+request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
 
 ## Development
 
diff --git a/Makefile.in b/Makefile.in
index 1193cd85..435fc34d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -123,17 +123,19 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/san_bump.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
+	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/hpa_hooks.c \
+	$(srcroot)src/hpa_utils.c \
 	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
+	$(srcroot)src/conf.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
-	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
@@ -141,6 +143,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
+	$(srcroot)src/prof_stack_range.c \
 	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
 	$(srcroot)src/psset.c \
@@ -153,8 +156,10 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
+	$(srcroot)src/thread_event_registry.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
+	$(srcroot)src/util.c \
 	$(srcroot)src/witness.c
 ifeq ($(enable_zone_allocator), 1)
 C_SRCS += $(srcroot)src/zone.c
@@ -201,14 +206,21 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/background_thread.c \
 	$(srcroot)test/unit/background_thread_enable.c \
+	$(srcroot)test/unit/background_thread_init.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
+	$(srcroot)test/unit/bin.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
 	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
+	$(srcroot)test/unit/conf.c \
+	$(srcroot)test/unit/conf_init_0.c \
+	$(srcroot)test/unit/conf_init_1.c \
+	$(srcroot)test/unit/conf_init_confirm.c \
+	$(srcroot)test/unit/conf_parse.c \
 	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
@@ -224,6 +236,10 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_sec_integration.c \
+	$(srcroot)test/unit/hpa_thp_always.c \
+	$(srcroot)test/unit/hpa_vectorized_madvise.c \
+	$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
@@ -231,6 +247,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
+	$(srcroot)test/unit/json_stats.c \
+	$(srcroot)test/unit/large_ralloc.c \
 	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/malloc_conf_2.c \
@@ -240,6 +258,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
+	$(srcroot)test/unit/ncached_max.c \
 	$(srcroot)test/unit/oversize_threshold.c \
 	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
@@ -256,6 +275,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_mdump.c \
 	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
+	$(srcroot)test/unit/prof_small.c \
 	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
@@ -279,6 +299,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
 	$(srcroot)test/unit/sz.c \
+	$(srcroot)test/unit/tcache_init.c \
 	$(srcroot)test/unit/tcache_max.c \
 	$(srcroot)test/unit/test_hooks.c \
 	$(srcroot)test/unit/thread_event.c \
@@ -332,10 +353,15 @@ TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/mallctl.c \
 	$(srcroot)test/stress/microbench.c
+ifeq (@enable_cxx@, 1)
+TESTS_STRESS_CPP := $(srcroot)test/stress/cpp/microbench.cpp
+else
+TESTS_STRESS_CPP :=
+endif
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
-	$(TESTS_ANALYZE) $(TESTS_STRESS)
+	$(TESTS_ANALYZE) $(TESTS_STRESS) $(TESTS_STRESS_CPP)
 
 PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
 PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
@@ -362,9 +388,10 @@ TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
 TESTS_ANALYZE_OBJS := $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
+TESTS_STRESS_CPP_OBJS := $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
 TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_ANALYZE_OBJS) \
 	$(TESTS_STRESS_OBJS)
-TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
+TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS) $(TESTS_STRESS_CPP_OBJS)
 
 .PHONY: all dist build_doc_html build_doc_man build_doc
 .PHONY: install_bin install_include install_lib
@@ -454,10 +481,13 @@ $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
 $(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
 $(TESTS_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
 $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
+$(TESTS_STRESS_CPP_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_CPP_TEST
 $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
 $(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
 $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 $(TESTS_CPP_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
+$(TESTS_OBJS): CFLAGS += -fno-builtin
+$(TESTS_CPP_OBJS): CPPFLAGS += -fno-builtin
 ifneq ($(IMPORTLIB),$(SO))
 $(CPP_OBJS) $(C_SYM_OBJS) $(C_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
 endif
@@ -472,7 +502,7 @@ $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
 endif
 
 $(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
-$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
+$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS) $(TESTS_STRESS_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
 
 $(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
 	@mkdir -p $(@D)
@@ -513,7 +543,11 @@ endif
 
 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(C_PIC_OBJS),$(C_OBJS)) $(if $(PIC_CFLAGS),$(CPP_PIC_OBJS),$(CPP_OBJS))
 	@mkdir -p $(@D)
+ifeq (@enable_cxx@, 1)
+	$(CXX) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
+else
 	$(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
+endif
 
 $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(C_PIC_OBJS) $(CPP_PIC_OBJS)
 $(objroot)lib/$(LIBJEMALLOC).$(A) : $(C_OBJS) $(CPP_OBJS)
@@ -543,6 +577,28 @@ $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TE
 	@mkdir -p $(@D)
 	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
+$(objroot)test/stress/pa/pa_data_preprocessor$(EXE): $(objroot)test/stress/pa/pa_data_preprocessor.$(O)
+	@mkdir -p $(@D)
+	$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
+$(objroot)test/stress/pa/pa_microbench$(EXE): $(objroot)test/stress/pa/pa_microbench.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS)
+	@mkdir -p $(@D)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
+$(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -DJEMALLOC_STRESS_TEST -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
+ifdef CC_MM
+	@$(CC) -MM $(CPPFLAGS) -DJEMALLOC_STRESS_TEST -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
+$(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.cpp
+	@mkdir -p $(@D)
+	$(CXX) $(CXXFLAGS) -c $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
+ifdef CC_MM
+	@$(CXX) -MM $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
 ifeq ($(enable_shared), 1)
@@ -555,18 +611,20 @@ endif
 install_bin:
 	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	$(INSTALL) -v -m 755 $$b $(BINDIR); \
+	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+	$(INSTALL) -m 755 $$b $(BINDIR); \
 done
 
 install_include:
 	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	$(INSTALL) -v -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
 	$(INSTALL) -d $(LIBDIR)
-	$(INSTALL) -v -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
@@ -574,13 +632,15 @@ endif
 install_lib_static: $(STATIC_LIBS)
 	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	$(INSTALL) -v -m 755 $$l $(LIBDIR); \
+	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+	$(INSTALL) -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
 	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	$(INSTALL) -v -m 644 $$l $(LIBDIR)/pkgconfig; \
+	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 ifeq ($(enable_shared), 1)
@@ -594,13 +654,15 @@ install_lib: install_lib_pc
 install_doc_html: build_doc_html
 	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	$(INSTALL) -v -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man: build_doc_man
 	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	$(INSTALL) -v -m 644 $$d $(MANDIR)/man3; \
+	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man
@@ -656,7 +718,8 @@ endif
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
-tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
+tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
+tests_pa: $(objroot)test/stress/pa/pa_data_preprocessor$(EXE) $(objroot)test/stress/pa/pa_microbench$(EXE)
 tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
@@ -689,6 +752,7 @@ else
 endif
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
+	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%)
 check: check_unit check_integration check_integration_decay check_integration_prof
 
 clean:
diff --git a/README b/README
index 3a6e0d27..d33a69ce 100644
--- a/README
+++ b/README
@@ -17,4 +17,4 @@ jemalloc.
 
 The ChangeLog file contains a brief summary of changes for each release.
 
-URL: http://jemalloc.net/
+URL: https://jemalloc.net/
diff --git a/TUNING.md b/TUNING.md
index e96399d7..1f6bef35 100644
--- a/TUNING.md
+++ b/TUNING.md
@@ -11,9 +11,9 @@ by a few percent, or make favorable trade-offs.
 ## Notable runtime options for performance tuning
 
 Runtime options can be set via
-[malloc_conf](http://jemalloc.net/jemalloc.3.html#tuning).
+[malloc_conf](https://jemalloc.net/jemalloc.3.html#tuning).
 
-* [background_thread](http://jemalloc.net/jemalloc.3.html#background_thread)
+* [background_thread](https://jemalloc.net/jemalloc.3.html#background_thread)
 
     Enabling jemalloc background threads generally improves the tail latency for
     application threads, since unused memory purging is shifted to the dedicated
@@ -23,7 +23,7 @@ Runtime options can be set via
     Suggested: `background_thread:true` when jemalloc managed threads can be
     allowed.
 
-* [metadata_thp](http://jemalloc.net/jemalloc.3.html#opt.metadata_thp)
+* [metadata_thp](https://jemalloc.net/jemalloc.3.html#opt.metadata_thp)
 
     Allowing jemalloc to utilize transparent huge pages for its internal
     metadata usually reduces TLB misses significantly, especially for programs
@@ -35,8 +35,8 @@ Runtime options can be set via
     `metadata_thp:always`, which is expected to improve CPU utilization at a
     small memory cost.
 
-* [dirty_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and
-  [muzzy_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms)
+* [dirty_decay_ms](https://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and
+  [muzzy_decay_ms](https://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms)
 
     Decay time determines how fast jemalloc returns unused pages back to the
     operating system, and therefore provides a fairly straightforward trade-off
@@ -46,7 +46,7 @@ Runtime options can be set via
 
     Suggested: tune the values based on the desired trade-offs.
 
-* [narenas](http://jemalloc.net/jemalloc.3.html#opt.narenas)
+* [narenas](https://jemalloc.net/jemalloc.3.html#opt.narenas)
 
     By default jemalloc uses multiple arenas to reduce internal lock contention.
     However high arena count may also increase overall memory fragmentation,
@@ -57,7 +57,7 @@ Runtime options can be set via
     Suggested: if low parallelism is expected, try lower arena count while
     monitoring CPU and memory usage.
 
-* [percpu_arena](http://jemalloc.net/jemalloc.3.html#opt.percpu_arena)
+* [percpu_arena](https://jemalloc.net/jemalloc.3.html#opt.percpu_arena)
 
     Enable dynamic thread to arena association based on running CPU.  This has
     the potential to improve locality, e.g. when thread to CPU affinity is
@@ -100,28 +100,28 @@ aborts immediately on illegal options.
 In addition to the runtime options, there are a number of programmatic ways to
 improve application performance with jemalloc.
 
-* [Explicit arenas](http://jemalloc.net/jemalloc.3.html#arenas.create)
+* [Explicit arenas](https://jemalloc.net/jemalloc.3.html#arenas.create)
 
     Manually created arenas can help performance in various ways, e.g. by
     managing locality and contention for specific usages.  For example,
     applications can explicitly allocate frequently accessed objects from a
     dedicated arena with
-    [mallocx()](http://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve
+    [mallocx()](https://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve
     locality.  In addition, explicit arenas often benefit from individually
     tuned options, e.g. relaxed [decay
-    time](http://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if
+    time](https://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if
     frequent reuse is expected.
 
-* [Extent hooks](http://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks)
+* [Extent hooks](https://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks)
 
     Extent hooks allow customization for managing underlying memory.  One use
     case for performance purpose is to utilize huge pages -- for example,
-    [HHVM](https://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp)
+    [HHVM](httpss://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp)
     uses explicit arenas with customized extent hooks to manage 1GB huge pages
     for frequently accessed data, which reduces TLB misses significantly.
 
 * [Explicit thread-to-arena
-  binding](http://jemalloc.net/jemalloc.3.html#thread.arena)
+  binding](https://jemalloc.net/jemalloc.3.html#thread.arena)
 
     It is common for some threads in an application to have different memory
     access / allocation patterns.  Threads with heavy workloads often benefit
diff --git a/autogen.sh b/autogen.sh
index 75f32da6..c5325fc9 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,8 +9,8 @@ for i in autoconf; do
     fi
 done
 
-echo "./configure --enable-autogen $@"
-./configure --enable-autogen $@
+echo "./configure --enable-autogen \"$@\""
+./configure --enable-autogen "$@"
 if [ $? -ne 0 ]; then
     echo "Error $? in ./configure"
     exit 1
diff --git a/bin/jeprof.in b/bin/jeprof.in
index dbf6252b..9cae84ed 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -88,6 +88,7 @@ my %obj_tool_map = (
   #"nm_pdb" => "nm-pdb",       # for reading windows (PDB-format) executables
   #"addr2line_pdb" => "addr2line-pdb",                                # ditto
   #"otool" => "otool",         # equivalent of objdump on OS X
+  #"dyld_info" => "dyld_info",   # equivalent of otool on OS X for shared cache
 );
 # NOTE: these are lists, so you can put in commandline flags if you want.
 my @DOT = ("dot");          # leave non-absolute, since it may be in /usr/local
@@ -688,15 +689,15 @@ sub Main() {
   my $symbol_map = {};
 
   # Read one profile, pick the last item on the list
-  my $data = ReadProfile($main::prog, pop(@main::profile_files));
+  my $data = ReadProfile($main::prog, $main::profile_files[0]);
   my $profile = $data->{profile};
   my $pcs = $data->{pcs};
   my $libs = $data->{libs};   # Info about main program and shared libraries
   $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
 
   # Add additional profiles, if available.
-  if (scalar(@main::profile_files) > 0) {
-    foreach my $pname (@main::profile_files) {
+  if (scalar(@main::profile_files) > 1) {
+    foreach my $pname (@main::profile_files[1..$#main::profile_files]) {
       my $data2 = ReadProfile($main::prog, $pname);
       $profile = AddProfile($profile, $data2->{profile});
       $pcs = AddPcs($pcs, $data2->{pcs});
@@ -2955,8 +2956,25 @@ sub RemoveUninterestingFrames {
     foreach my $name ('@JEMALLOC_PREFIX@calloc',
                       'cfree',
                       '@JEMALLOC_PREFIX@malloc',
+                      'je_malloc_default',
                       'newImpl',
                       'void* newImpl',
+                      'fallbackNewImpl',
+                      'void* fallbackNewImpl',
+                      'fallback_impl',
+                      'void* fallback_impl',
+                      'imalloc',
+                      'int imalloc',
+                      'imalloc_body',
+                      'int imalloc_body',
+                      'prof_alloc_prep',
+                      'prof_tctx_t *prof_alloc_prep',
+                      'prof_backtrace_impl',
+                      'void prof_backtrace_impl',
+                      'je_prof_backtrace',
+                      'void je_prof_backtrace',
+                      'je_prof_tctx_create',
+                      'prof_tctx_t* prof_tctx_create',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',
@@ -2965,7 +2983,12 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@valloc',
                       '@JEMALLOC_PREFIX@realloc',
                       '@JEMALLOC_PREFIX@mallocx',
+                      'irallocx_prof',
+                      'void *irallocx_prof',
                       '@JEMALLOC_PREFIX@rallocx',
+                      'do_rallocx',
+                      'ixallocx_prof',
+                      'size_t ixallocx_prof',
                       '@JEMALLOC_PREFIX@xallocx',
                       '@JEMALLOC_PREFIX@dallocx',
                       '@JEMALLOC_PREFIX@sdallocx',
@@ -3078,6 +3101,8 @@ sub RemoveUninterestingFrames {
     foreach my $a (@addrs) {
       if (exists($symbols->{$a})) {
         my $func = $symbols->{$a}->[0];
+        # Remove suffix in the symbols following space when filtering.
+        $func =~ s/ .*//;
         if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
           # Throw away the portion of the backtrace seen so far, under the
           # assumption that previous frames were for functions internal to the
@@ -4500,19 +4525,19 @@ sub FindLibrary {
 # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
 sub DebuggingLibrary {
   my $file = shift;
-      
+
   if ($file !~ m|^/|) {
     return undef;
   }
-      
+
   # Find debug symbol file if it's named after the library's name.
-  
-  if (-f "/usr/lib/debug$file") {                 
+
+  if (-f "/usr/lib/debug$file") {
     if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; }
     return "/usr/lib/debug$file";
   } elsif (-f "/usr/lib/debug$file.debug") {
     if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; }
-    return "/usr/lib/debug$file.debug"; 
+    return "/usr/lib/debug$file.debug";
   }
 
   if(!$main::opt_debug_syms_by_id) {
@@ -4521,7 +4546,7 @@ sub DebuggingLibrary {
   }
 
   # Find debug file if it's named after the library's build ID.
-  
+
   my $readelf = '';
   if (!$main::gave_up_on_elfutils) {
     $readelf = qx/eu-readelf -n ${file}/;
@@ -4657,7 +4682,65 @@ sub ParseTextSectionHeaderFromOtool {
   return $r;
 }
 
+# Parse text section header of a library in OS X shared cache using dyld_info
+sub ParseTextSectionHeaderFromDyldInfo {
+  my $lib = shift;
+
+  my $size = undef;
+  my $vma;
+  my $file_offset;
+  # Get dyld_info output from the library file to figure out how to
+  # map between mapped addresses and addresses in the library.
+  my $cmd = ShellEscape($obj_tool_map{"dyld_info"}, "-segments", $lib);
+  open(DYLD, "$cmd |") || error("$cmd: $!\n");
+
+  while (<DYLD>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    # -segments:
+    #    load-address    segment section        sect-size  seg-size perm
+    #     0x1803E0000    __TEXT                                   112KB r.x
+    #     0x1803E4F34             __text            80960
+    #     0x1803F8B74             __auth_stubs        768
+    #     0x1803F8E74             __init_offsets        4
+    #     0x1803F8E78             __gcc_except_tab   1180
+    my @x = split;
+    if ($#x >= 2) {
+      if ($x[0] eq 'load-offset') {
+        # dyld_info should only be used for the shared lib.
+        return undef;
+      } elsif ($x[1] eq '__TEXT') {
+        $file_offset = $x[0];
+      } elsif ($x[1] eq '__text') {
+        $size = $x[2];
+        $vma = $x[0];
+        $file_offset = AddressSub($x[0], $file_offset);
+        last;
+      }
+    }
+  }
+  close(DYLD);
+
+  if (!defined($vma) || !defined($size) || !defined($file_offset)) {
+     return undef;
+  }
+
+  my $r = {};
+  $r->{size} = $size;
+  $r->{vma} = $vma;
+  $r->{file_offset} = $file_offset;
+
+  return $r;
+}
+
 sub ParseTextSectionHeader {
+  # obj_tool_map("dyld_info") is only defined if we're in a Mach-O environment
+  if (defined($obj_tool_map{"dyld_info"})) {
+    my $r = ParseTextSectionHeaderFromDyldInfo(@_);
+    if (defined($r)){
+      return $r;
+    }
+  }
+  # if dyld_info doesn't work, or we don't have it, fall back to otool
   # obj_tool_map("otool") is only defined if we're in a Mach-O environment
   if (defined($obj_tool_map{"otool"})) {
     my $r = ParseTextSectionHeaderFromOtool(@_);
@@ -4698,7 +4781,7 @@ sub ParseLibraries {
       $offset = HexExtend($3);
       $lib = $4;
       $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
-    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
+    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.(so|dll|dylib|bundle)(\.\d+)*)/) {
       # Cooked line from DumpAddressMap.  Example:
       #   40000000-40015000: /lib/ld-2.3.2.so
       $start = HexExtend($1);
@@ -4715,6 +4798,15 @@ sub ParseLibraries {
       $offset = HexExtend($3);
       $lib = $4;
       $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
+    } elsif (($l =~ /^\s*($h)-($h):\s*(\S+)/) && ($3 eq $prog)) {
+      # PIEs and address space randomization do not play well with our
+      # default assumption that main executable is at lowest
+      # addresses. So we're detecting main executable from
+      # DumpAddressMap as well.
+      $start = HexExtend($1);
+      $finish = HexExtend($2);
+      $offset = $zero_offset;
+      $lib = $3;
     }
     # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in
     # function procfs_doprocmap (sys/fs/procfs/procfs_map.c)
@@ -5245,6 +5337,7 @@ sub ConfigureObjTools {
   if ($file_type =~ /Mach-O/) {
     # OS X uses otool to examine Mach-O files, rather than objdump.
     $obj_tool_map{"otool"} = "otool";
+    $obj_tool_map{"dyld_info"} = "dyld_info";
     $obj_tool_map{"addr2line"} = "false";  # no addr2line
     $obj_tool_map{"objdump"} = "false";  # no objdump
   }
diff --git a/build-aux/config.guess b/build-aux/config.guess
index f7727026..a9d01fde 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,12 +1,14 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2021 Free Software Foundation, Inc.
+#   Copyright 1992-2025 Free Software Foundation, Inc.
 
-timestamp='2021-01-01'
+# shellcheck disable=SC2006,SC2268 # see below for rationale
+
+timestamp='2025-07-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
+# the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
@@ -32,12 +34,20 @@ timestamp='2021-01-01'
 # Please send patches to <config-patches@gnu.org>.
 
 
-me=$(echo "$0" | sed -e 's,.*/,,')
+# The "shellcheck disable" line above the timestamp inhibits complaints
+# about features and limitations of the classic Bourne shell that were
+# superseded or lifted in POSIX.  However, this script identifies a wide
+# variety of pre-POSIX systems that do not have POSIX shells at all, and
+# even some reasonably current systems (Solaris 10 as case-in-point) still
+# have a pre-POSIX /bin/sh.
+
+
+me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
 Usage: $0 [OPTION]
 
-Output the configuration name of the system \`$me' is run on.
+Output the configuration name of the system '$me' is run on.
 
 Options:
   -h, --help         print this help, then exit
@@ -50,13 +60,13 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2021 Free Software Foundation, Inc.
+Copyright 1992-2025 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
 
 help="
-Try \`$me --help' for more information."
+Try '$me --help' for more information."
 
 # Parse command line
 while test $# -gt 0 ; do
@@ -84,13 +94,16 @@ if test $# != 0; then
   exit 1
 fi
 
+# Just in case it came from the environment.
+GUESS=
+
 # CC_FOR_BUILD -- compiler used by this script. Note that the use of a
 # compiler to aid in system detection is discouraged as it requires
 # temporary files to be created and, as you can see below, it is a
 # headache to deal with in a portable fashion.
 
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
+# Historically, 'CC_FOR_BUILD' used to be named 'HOST_CC'. We still
+# use 'HOST_CC' if defined, but it is deprecated.
 
 # Portable tmp directory creation inspired by the Autoconf team.
 
@@ -102,17 +115,17 @@ set_cc_for_build() {
     # prevent multiple calls if $tmp is already set
     test "$tmp" && return 0
     : "${TMPDIR=/tmp}"
-    # shellcheck disable=SC2039
-    { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } ||
+    # shellcheck disable=SC2039,SC3028
+    { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
 	{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
 	{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
 	{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
     dummy=$tmp/dummy
     case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
 	,,)    echo "int x;" > "$dummy.c"
-	       for driver in cc gcc c89 c99 ; do
+	       for driver in cc gcc c17 c99 c89 ; do
 		   if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
-		       CC_FOR_BUILD="$driver"
+		       CC_FOR_BUILD=$driver
 		       break
 		   fi
 	       done
@@ -131,17 +144,20 @@ if test -f /.attbin/uname ; then
 	PATH=$PATH:/.attbin ; export PATH
 fi
 
-UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown
-UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown
-UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown
-UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 
-case "$UNAME_SYSTEM" in
+case $UNAME_SYSTEM in
 Linux|GNU|GNU/*)
 	LIBC=unknown
 
 	set_cc_for_build
 	cat <<-EOF > "$dummy.c"
+	#if defined(__ANDROID__)
+	LIBC=android
+	#else
 	#include <features.h>
 	#if defined(__UCLIBC__)
 	LIBC=uclibc
@@ -149,6 +165,8 @@ Linux|GNU|GNU/*)
 	LIBC=dietlibc
 	#elif defined(__GLIBC__)
 	LIBC=gnu
+	#elif defined(__LLVM_LIBC__)
+	LIBC=llvm
 	#else
 	#include <stdarg.h>
 	/* First heuristic to detect musl libc.  */
@@ -156,8 +174,10 @@ Linux|GNU|GNU/*)
 	LIBC=musl
 	#endif
 	#endif
+	#endif
 	EOF
-	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
+	cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
+	eval "$cc_set_libc"
 
 	# Second heuristic to detect musl libc.
 	if [ "$LIBC" = unknown ] &&
@@ -176,7 +196,7 @@ esac
 
 # Note: order is significant - the case branches are not exclusive.
 
-case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
+case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
 	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
@@ -188,12 +208,11 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	#
 	# Note: NetBSD doesn't particularly care about the vendor
 	# portion of the name.  We always set it to "unknown".
-	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
-	    "/sbin/$sysctl" 2>/dev/null || \
-	    "/usr/sbin/$sysctl" 2>/dev/null || \
-	    echo unknown))
-	case "$UNAME_MACHINE_ARCH" in
+	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+	    /sbin/sysctl -n hw.machine_arch 2>/dev/null || \
+	    /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \
+	    echo unknown)`
+	case $UNAME_MACHINE_ARCH in
 	    aarch64eb) machine=aarch64_be-unknown ;;
 	    armeb) machine=armeb-unknown ;;
 	    arm*) machine=arm-unknown ;;
@@ -201,15 +220,15 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	    sh3eb) machine=sh-unknown ;;
 	    sh5el) machine=sh5le-unknown ;;
 	    earmv*)
-		arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,')
-		endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p')
-		machine="${arch}${endian}"-unknown
+		arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+		endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
+		machine=${arch}${endian}-unknown
 		;;
-	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
+	    *) machine=$UNAME_MACHINE_ARCH-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
 	# to ELF recently (or will in the future) and ABI.
-	case "$UNAME_MACHINE_ARCH" in
+	case $UNAME_MACHINE_ARCH in
 	    earm*)
 		os=netbsdelf
 		;;
@@ -230,10 +249,10 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 		;;
 	esac
 	# Determine ABI tags.
-	case "$UNAME_MACHINE_ARCH" in
+	case $UNAME_MACHINE_ARCH in
 	    earm*)
 		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
-		abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr")
+		abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
 		;;
 	esac
 	# The OS release
@@ -241,76 +260,82 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# thus, need a distinct triplet. However, they do not need
 	# kernel version information, so it can be replaced with a
 	# suitable tag, in the style of linux-gnu.
-	case "$UNAME_VERSION" in
+	case $UNAME_VERSION in
 	    Debian*)
 		release='-gnu'
 		;;
 	    *)
-		release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2)
+		release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
 		;;
 	esac
 	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "$machine-${os}${release}${abi-}"
-	exit ;;
+	GUESS=$machine-${os}${release}${abi-}
+	;;
     *:Bitrig:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE
+	;;
     *:OpenBSD:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE
+	;;
+    *:SecBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE
+	;;
     *:LibertyBSD:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE
+	;;
     *:MidnightBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE
+	;;
     *:ekkoBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE
+	;;
     *:SolidBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE
+	;;
     *:OS108:*:*)
-	echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE
+	;;
     macppc:MirBSD:*:*)
-	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE
+	;;
     *:MirBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE
+	;;
     *:Sortix:*:*)
-	echo "$UNAME_MACHINE"-unknown-sortix
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-sortix
+	;;
     *:Twizzler:*:*)
-	echo "$UNAME_MACHINE"-unknown-twizzler
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-twizzler
+	;;
     *:Redox:*:*)
-	echo "$UNAME_MACHINE"-unknown-redox
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-redox
+	;;
     mips:OSF1:*.*)
-	echo mips-dec-osf1
-	exit ;;
+	GUESS=mips-dec-osf1
+	;;
     alpha:OSF1:*:*)
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	trap '' 0
 	case $UNAME_RELEASE in
 	*4.0)
-		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}')
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}')
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
 	# OSF/1 and Tru64 systems produced since 1995.  I hope that
 	# covers most systems running today.  This code pipes the CPU
 	# types through head -n 1, so we only detect the type of CPU 0.
-	ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1)
-	case "$ALPHA_CPU_TYPE" in
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case $ALPHA_CPU_TYPE in
 	    "EV4 (21064)")
 		UNAME_MACHINE=alpha ;;
 	    "EV4.5 (21064)")
@@ -347,68 +372,69 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# A Tn.n version is a released field test version.
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
-	echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)"
-	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
-	exitcode=$?
-	trap '' 0
-	exit $exitcode ;;
+	OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	GUESS=$UNAME_MACHINE-dec-osf$OSF_REL
+	;;
     Amiga*:UNIX_System_V:4.0:*)
-	echo m68k-unknown-sysv4
-	exit ;;
+	GUESS=m68k-unknown-sysv4
+	;;
     *:[Aa]miga[Oo][Ss]:*:*)
-	echo "$UNAME_MACHINE"-unknown-amigaos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-amigaos
+	;;
     *:[Mm]orph[Oo][Ss]:*:*)
-	echo "$UNAME_MACHINE"-unknown-morphos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-morphos
+	;;
     *:OS/390:*:*)
-	echo i370-ibm-openedition
-	exit ;;
+	GUESS=i370-ibm-openedition
+	;;
     *:z/VM:*:*)
-	echo s390-ibm-zvmoe
-	exit ;;
+	GUESS=s390-ibm-zvmoe
+	;;
     *:OS400:*:*)
-	echo powerpc-ibm-os400
-	exit ;;
+	GUESS=powerpc-ibm-os400
+	;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	echo arm-acorn-riscix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=arm-acorn-riscix$UNAME_RELEASE
+	;;
     arm*:riscos:*:*|arm*:RISCOS:*:*)
-	echo arm-unknown-riscos
-	exit ;;
+	GUESS=arm-unknown-riscos
+	;;
     SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
-	echo hppa1.1-hitachi-hiuxmpp
-	exit ;;
+	GUESS=hppa1.1-hitachi-hiuxmpp
+	;;
     Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
 	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	if test "$( (/bin/universe) 2>/dev/null)" = att ; then
-		echo pyramid-pyramid-sysv3
-	else
-		echo pyramid-pyramid-bsd
-	fi
-	exit ;;
+	case `(/bin/universe) 2>/dev/null` in
+	    att) GUESS=pyramid-pyramid-sysv3 ;;
+	    *)   GUESS=pyramid-pyramid-bsd   ;;
+	esac
+	;;
     NILE*:*:*:dcosx)
-	echo pyramid-pyramid-svr4
-	exit ;;
+	GUESS=pyramid-pyramid-svr4
+	;;
     DRS?6000:unix:4.0:6*)
-	echo sparc-icl-nx6
-	exit ;;
+	GUESS=sparc-icl-nx6
+	;;
     DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
-	case $(/usr/bin/uname -p) in
-	    sparc) echo sparc-icl-nx7; exit ;;
-	esac ;;
+	case `/usr/bin/uname -p` in
+	    sparc) GUESS=sparc-icl-nx7 ;;
+	esac
+	;;
     s390x:SunOS:*:*)
-	echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL
+	;;
     sun4H:SunOS:5.*:*)
-	echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-hal-solaris2$SUN_REL
+	;;
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-sun-solaris2$SUN_REL
+	;;
     i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
-	echo i386-pc-auroraux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=i386-pc-auroraux$UNAME_RELEASE
+	;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	set_cc_for_build
 	SUN_ARCH=i386
@@ -417,47 +443,50 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# This test works for both compilers.
 	if test "$CC_FOR_BUILD" != no_compiler_found; then
 	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		(CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \
 		grep IS_64BIT_ARCH >/dev/null
 	    then
 		SUN_ARCH=x86_64
 	    fi
 	fi
-	echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=$SUN_ARCH-pc-solaris2$SUN_REL
+	;;
     sun4*:SunOS:6*:*)
 	# According to config.sub, this is the proper way to canonicalize
 	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
 	# it's likely to be more like Solaris than SunOS4.
-	echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-sun-solaris3$SUN_REL
+	;;
     sun4*:SunOS:*:*)
-	case "$(/usr/bin/arch -k)" in
+	case `/usr/bin/arch -k` in
 	    Series*|S4*)
-		UNAME_RELEASE=$(uname -v)
+		UNAME_RELEASE=`uname -v`
 		;;
 	esac
-	# Japanese Language versions have a version number like `4.1.3-JL'.
-	echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')"
-	exit ;;
+	# Japanese Language versions have a version number like '4.1.3-JL'.
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'`
+	GUESS=sparc-sun-sunos$SUN_REL
+	;;
     sun3*:SunOS:*:*)
-	echo m68k-sun-sunos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-sun-sunos$UNAME_RELEASE
+	;;
     sun*:*:4.2BSD:*)
-	UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
 	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
-	case "$(/bin/arch)" in
+	case `/bin/arch` in
 	    sun3)
-		echo m68k-sun-sunos"$UNAME_RELEASE"
+		GUESS=m68k-sun-sunos$UNAME_RELEASE
 		;;
 	    sun4)
-		echo sparc-sun-sunos"$UNAME_RELEASE"
+		GUESS=sparc-sun-sunos$UNAME_RELEASE
 		;;
 	esac
-	exit ;;
+	;;
     aushp:SunOS:*:*)
-	echo sparc-auspex-sunos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-auspex-sunos$UNAME_RELEASE
+	;;
     # The situation for MiNT is a little confusing.  The machine name
     # can be virtually everything (everything which is not
     # "atarist" or "atariste" at least should have a processor
@@ -467,41 +496,41 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-	echo m68k-milan-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-milan-mint$UNAME_RELEASE
+	;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-	echo m68k-hades-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-hades-mint$UNAME_RELEASE
+	;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-	echo m68k-unknown-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-unknown-mint$UNAME_RELEASE
+	;;
     m68k:machten:*:*)
-	echo m68k-apple-machten"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-apple-machten$UNAME_RELEASE
+	;;
     powerpc:machten:*:*)
-	echo powerpc-apple-machten"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-apple-machten$UNAME_RELEASE
+	;;
     RISC*:Mach:*:*)
-	echo mips-dec-mach_bsd4.3
-	exit ;;
+	GUESS=mips-dec-mach_bsd4.3
+	;;
     RISC*:ULTRIX:*:*)
-	echo mips-dec-ultrix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-dec-ultrix$UNAME_RELEASE
+	;;
     VAX*:ULTRIX*:*:*)
-	echo vax-dec-ultrix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=vax-dec-ultrix$UNAME_RELEASE
+	;;
     2020:CLIX:*:* | 2430:CLIX:*:*)
-	echo clipper-intergraph-clix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=clipper-intergraph-clix$UNAME_RELEASE
+	;;
     mips:*:*:UMIPS | mips:*:*:RISCos)
 	set_cc_for_build
 	sed 's/^	//' << EOF > "$dummy.c"
@@ -526,85 +555,87 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	}
 EOF
 	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
-	  dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') &&
-	  SYSTEM_NAME=$("$dummy" "$dummyarg") &&
+	  dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
 	    { echo "$SYSTEM_NAME"; exit; }
-	echo mips-mips-riscos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-mips-riscos$UNAME_RELEASE
+	;;
     Motorola:PowerMAX_OS:*:*)
-	echo powerpc-motorola-powermax
-	exit ;;
+	GUESS=powerpc-motorola-powermax
+	;;
     Motorola:*:4.3:PL8-*)
-	echo powerpc-harris-powermax
-	exit ;;
+	GUESS=powerpc-harris-powermax
+	;;
     Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
-	echo powerpc-harris-powermax
-	exit ;;
+	GUESS=powerpc-harris-powermax
+	;;
     Night_Hawk:Power_UNIX:*:*)
-	echo powerpc-harris-powerunix
-	exit ;;
+	GUESS=powerpc-harris-powerunix
+	;;
     m88k:CX/UX:7*:*)
-	echo m88k-harris-cxux7
-	exit ;;
+	GUESS=m88k-harris-cxux7
+	;;
     m88k:*:4*:R4*)
-	echo m88k-motorola-sysv4
-	exit ;;
+	GUESS=m88k-motorola-sysv4
+	;;
     m88k:*:3*:R3*)
-	echo m88k-motorola-sysv3
-	exit ;;
+	GUESS=m88k-motorola-sysv3
+	;;
     AViiON:dgux:*:*)
 	# DG/UX returns AViiON for all architectures
-	UNAME_PROCESSOR=$(/usr/bin/uname -p)
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
 	then
 	    if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
 	       test "$TARGET_BINARY_INTERFACE"x = x
 	    then
-		echo m88k-dg-dgux"$UNAME_RELEASE"
+		GUESS=m88k-dg-dgux$UNAME_RELEASE
 	    else
-		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
+		GUESS=m88k-dg-dguxbcs$UNAME_RELEASE
 	    fi
 	else
-	    echo i586-dg-dgux"$UNAME_RELEASE"
+	    GUESS=i586-dg-dgux$UNAME_RELEASE
 	fi
-	exit ;;
+	;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
-	echo m88k-dolphin-sysv3
-	exit ;;
+	GUESS=m88k-dolphin-sysv3
+	;;
     M88*:*:R3*:*)
 	# Delta 88k system running SVR3
-	echo m88k-motorola-sysv3
-	exit ;;
+	GUESS=m88k-motorola-sysv3
+	;;
     XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
-	echo m88k-tektronix-sysv3
-	exit ;;
+	GUESS=m88k-tektronix-sysv3
+	;;
     Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
-	echo m68k-tektronix-bsd
-	exit ;;
+	GUESS=m68k-tektronix-bsd
+	;;
     *:IRIX*:*:*)
-	echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')"
-	exit ;;
+	IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'`
+	GUESS=mips-sgi-irix$IRIX_REL
+	;;
     ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
-	exit ;;               # Note that: echo "'$(uname -s)'" gives 'AIX '
+	GUESS=romp-ibm-aix    # uname -m gives an 8 hex-code CPU id
+	;;                    # Note that: echo "'`uname -s`'" gives 'AIX '
     i*86:AIX:*:*)
-	echo i386-ibm-aix
-	exit ;;
+	GUESS=i386-ibm-aix
+	;;
     ia64:AIX:*:*)
 	if test -x /usr/bin/oslevel ; then
-		IBM_REV=$(/usr/bin/oslevel)
+		IBM_REV=`/usr/bin/oslevel`
 	else
-		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
 	fi
-	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
-	exit ;;
+	GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV
+	;;
     *:AIX:2:3)
 	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
 		set_cc_for_build
 		sed 's/^		//' << EOF > "$dummy.c"
 		#include <sys/systemcfg.h>
 
-		main()
+		int
+		main ()
 			{
 			if (!__power_pc())
 				exit(1);
@@ -612,68 +643,68 @@ EOF
 			exit(0);
 			}
 EOF
-		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy")
+		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
 		then
-			echo "$SYSTEM_NAME"
+			GUESS=$SYSTEM_NAME
 		else
-			echo rs6000-ibm-aix3.2.5
+			GUESS=rs6000-ibm-aix3.2.5
 		fi
 	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
-		echo rs6000-ibm-aix3.2.4
+		GUESS=rs6000-ibm-aix3.2.4
 	else
-		echo rs6000-ibm-aix3.2
+		GUESS=rs6000-ibm-aix3.2
 	fi
-	exit ;;
+	;;
     *:AIX:*:[4567])
-	IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }')
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
 	else
 		IBM_ARCH=powerpc
 	fi
 	if test -x /usr/bin/lslpp ; then
-		IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc |
-			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/)
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
 	else
-		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
 	fi
-	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
-	exit ;;
+	GUESS=$IBM_ARCH-ibm-aix$IBM_REV
+	;;
     *:AIX:*:*)
-	echo rs6000-ibm-aix
-	exit ;;
+	GUESS=rs6000-ibm-aix
+	;;
     ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
-	echo romp-ibm-bsd4.4
-	exit ;;
+	GUESS=romp-ibm-bsd4.4
+	;;
     ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
-	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
-	exit ;;                             # report: romp-ibm BSD 4.3
+	GUESS=romp-ibm-bsd$UNAME_RELEASE    # 4.3 with uname added to
+	;;                                  # report: romp-ibm BSD 4.3
     *:BOSX:*:*)
-	echo rs6000-bull-bosx
-	exit ;;
+	GUESS=rs6000-bull-bosx
+	;;
     DPX/2?00:B.O.S.:*:*)
-	echo m68k-bull-sysv3
-	exit ;;
+	GUESS=m68k-bull-sysv3
+	;;
     9000/[34]??:4.3bsd:1.*:*)
-	echo m68k-hp-bsd
-	exit ;;
+	GUESS=m68k-hp-bsd
+	;;
     hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
-	echo m68k-hp-bsd4.4
-	exit ;;
+	GUESS=m68k-hp-bsd4.4
+	;;
     9000/[34678]??:HP-UX:*:*)
-	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
-	case "$UNAME_MACHINE" in
+	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
+	case $UNAME_MACHINE in
 	    9000/31?)            HP_ARCH=m68000 ;;
 	    9000/[34]??)         HP_ARCH=m68k ;;
 	    9000/[678][0-9][0-9])
 		if test -x /usr/bin/getconf; then
-		    sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null)
-		    sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null)
-		    case "$sc_cpu_version" in
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case $sc_cpu_version in
 		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
 		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
 		      532)                      # CPU_PA_RISC2_0
-			case "$sc_kernel_bits" in
+			case $sc_kernel_bits in
 			  32) HP_ARCH=hppa2.0n ;;
 			  64) HP_ARCH=hppa2.0w ;;
 			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
@@ -688,7 +719,8 @@ EOF
 		#include <stdlib.h>
 		#include <unistd.h>
 
-		int main ()
+		int
+		main ()
 		{
 		#if defined(_SC_KERNEL_BITS)
 		    long bits = sysconf(_SC_KERNEL_BITS);
@@ -715,7 +747,7 @@ EOF
 		    exit (0);
 		}
 EOF
-		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy")
+		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
 		fi ;;
 	esac
@@ -740,12 +772,12 @@ EOF
 		HP_ARCH=hppa64
 	    fi
 	fi
-	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
-	exit ;;
+	GUESS=$HP_ARCH-hp-hpux$HPUX_REV
+	;;
     ia64:HP-UX:*:*)
-	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
-	echo ia64-hp-hpux"$HPUX_REV"
-	exit ;;
+	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
+	GUESS=ia64-hp-hpux$HPUX_REV
+	;;
     3050*:HI-UX:*:*)
 	set_cc_for_build
 	sed 's/^	//' << EOF > "$dummy.c"
@@ -773,38 +805,38 @@ EOF
 	  exit (0);
 	}
 EOF
-	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") &&
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
 		{ echo "$SYSTEM_NAME"; exit; }
-	echo unknown-hitachi-hiuxwe2
-	exit ;;
+	GUESS=unknown-hitachi-hiuxwe2
+	;;
     9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
-	echo hppa1.1-hp-bsd
-	exit ;;
+	GUESS=hppa1.1-hp-bsd
+	;;
     9000/8??:4.3bsd:*:*)
-	echo hppa1.0-hp-bsd
-	exit ;;
+	GUESS=hppa1.0-hp-bsd
+	;;
     *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
-	echo hppa1.0-hp-mpeix
-	exit ;;
+	GUESS=hppa1.0-hp-mpeix
+	;;
     hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
-	echo hppa1.1-hp-osf
-	exit ;;
+	GUESS=hppa1.1-hp-osf
+	;;
     hp8??:OSF1:*:*)
-	echo hppa1.0-hp-osf
-	exit ;;
+	GUESS=hppa1.0-hp-osf
+	;;
     i*86:OSF1:*:*)
 	if test -x /usr/sbin/sysversion ; then
-	    echo "$UNAME_MACHINE"-unknown-osf1mk
+	    GUESS=$UNAME_MACHINE-unknown-osf1mk
 	else
-	    echo "$UNAME_MACHINE"-unknown-osf1
+	    GUESS=$UNAME_MACHINE-unknown-osf1
 	fi
-	exit ;;
+	;;
     parisc*:Lites*:*:*)
-	echo hppa1.1-hp-lites
-	exit ;;
+	GUESS=hppa1.1-hp-lites
+	;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
-	echo c1-convex-bsd
-	exit ;;
+	GUESS=c1-convex-bsd
+	;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
@@ -812,17 +844,18 @@ EOF
 	fi
 	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
-	echo c34-convex-bsd
-	exit ;;
+	GUESS=c34-convex-bsd
+	;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
-	echo c38-convex-bsd
-	exit ;;
+	GUESS=c38-convex-bsd
+	;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
-	echo c4-convex-bsd
-	exit ;;
+	GUESS=c4-convex-bsd
+	;;
     CRAY*Y-MP:*:*:*)
-	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=ymp-cray-unicos$CRAY_REL
+	;;
     CRAY*[A-Z]90:*:*:*)
 	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
 	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
@@ -830,114 +863,155 @@ EOF
 	      -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*TS:*:*:*)
-	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=t90-cray-unicos$CRAY_REL
+	;;
     CRAY*T3E:*:*:*)
-	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=alphaev5-cray-unicosmk$CRAY_REL
+	;;
     CRAY*SV1:*:*:*)
-	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=sv1-cray-unicos$CRAY_REL
+	;;
     *:UNICOS/mp:*:*)
-	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=craynv-cray-unicosmp$CRAY_REL
+	;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)
-	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
-	FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/')
-	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
+	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
+	GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
+	;;
     5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
-	FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/')
-	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
+	;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
-	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE
+	;;
     sparc*:BSD/OS:*:*)
-	echo sparc-unknown-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-unknown-bsdi$UNAME_RELEASE
+	;;
     *:BSD/OS:*:*)
-	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE
+	;;
     arm:FreeBSD:*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	set_cc_for_build
 	if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_PCS_VFP
 	then
-	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi
+	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi
 	else
-	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf
+	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf
 	fi
-	exit ;;
+	;;
     *:FreeBSD:*:*)
-	UNAME_PROCESSOR=$(/usr/bin/uname -p)
-	case "$UNAME_PROCESSOR" in
+	UNAME_PROCESSOR=`uname -p`
+	case $UNAME_PROCESSOR in
 	    amd64)
 		UNAME_PROCESSOR=x86_64 ;;
 	    i386)
 		UNAME_PROCESSOR=i586 ;;
 	esac
-	echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
-	exit ;;
+	FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL
+	;;
     i*:CYGWIN*:*)
-	echo "$UNAME_MACHINE"-pc-cygwin
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-cygwin
+	;;
     *:MINGW64*:*)
-	echo "$UNAME_MACHINE"-pc-mingw64
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-mingw64
+	;;
     *:MINGW*:*)
-	echo "$UNAME_MACHINE"-pc-mingw32
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-mingw32
+	;;
     *:MSYS*:*)
-	echo "$UNAME_MACHINE"-pc-msys
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-msys
+	;;
     i*:PW*:*)
-	echo "$UNAME_MACHINE"-pc-pw32
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-pw32
+	;;
+    *:SerenityOS:*:*)
+        GUESS=$UNAME_MACHINE-pc-serenity
+        ;;
     *:Interix*:*)
-	case "$UNAME_MACHINE" in
+	case $UNAME_MACHINE in
 	    x86)
-		echo i586-pc-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=i586-pc-interix$UNAME_RELEASE
+		;;
 	    authenticamd | genuineintel | EM64T)
-		echo x86_64-unknown-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=x86_64-unknown-interix$UNAME_RELEASE
+		;;
 	    IA64)
-		echo ia64-unknown-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=ia64-unknown-interix$UNAME_RELEASE
+		;;
 	esac ;;
     i*:UWIN*:*)
-	echo "$UNAME_MACHINE"-pc-uwin
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-uwin
+	;;
     amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
-	echo x86_64-pc-cygwin
-	exit ;;
+	GUESS=x86_64-pc-cygwin
+	;;
     prep*:SunOS:5.*:*)
-	echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=powerpcle-unknown-solaris2$SUN_REL
+	;;
     *:GNU:*:*)
 	# the GNU system
-	echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')"
-	exit ;;
+	GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'`
+	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'`
+	GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL
+	;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC"
-	exit ;;
+	GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"`
+	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC
+	;;
+    x86_64:[Mm]anagarm:*:*|i?86:[Mm]anagarm:*:*)
+	GUESS="$UNAME_MACHINE-pc-managarm-mlibc"
+	;;
+    *:[Mm]anagarm:*:*)
+	GUESS="$UNAME_MACHINE-unknown-managarm-mlibc"
+	;;
     *:Minix:*:*)
-	echo "$UNAME_MACHINE"-unknown-minix
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-minix
+	;;
     aarch64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	set_cc_for_build
+	CPU=$UNAME_MACHINE
+	LIBCABI=$LIBC
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
+	    ABI=64
+	    sed 's/^	    //' << EOF > "$dummy.c"
+	    #ifdef __ARM_EABI__
+	    #ifdef __ARM_PCS_VFP
+	    ABI=eabihf
+	    #else
+	    ABI=eabi
+	    #endif
+	    #endif
+EOF
+	    cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'`
+	    eval "$cc_set_abi"
+	    case $ABI in
+		eabi | eabihf) CPU=armv8l; LIBCABI=$LIBC$ABI ;;
+	    esac
+	fi
+	GUESS=$CPU-unknown-linux-$LIBCABI
+	;;
     aarch64_be:Linux:*:*)
 	UNAME_MACHINE=aarch64_be
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     alpha:Linux:*:*)
-	case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
 	  EV56)  UNAME_MACHINE=alphaev56 ;;
 	  PCA56) UNAME_MACHINE=alphapca56 ;;
@@ -948,63 +1022,72 @@ EOF
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
 	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    arc:Linux:*:* | arceb:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     arm*:Linux:*:*)
 	set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_EABI__
 	then
-	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	    GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
 	else
 	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 		| grep -q __ARM_PCS_VFP
 	    then
-		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
+		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi
 	    else
-		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
+		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf
 	    fi
 	fi
-	exit ;;
+	;;
     avr32*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     cris:Linux:*:*)
-	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
+	;;
     crisv32:Linux:*:*)
-	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
+	;;
     e2k:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     frv:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     hexagon:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     i*86:Linux:*:*)
-	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-linux-$LIBC
+	;;
     ia64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     k1om:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    kvx:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    kvx:cos:*:*)
+	GUESS=$UNAME_MACHINE-unknown-cos
+	;;
+    kvx:mbr:*:*)
+	GUESS=$UNAME_MACHINE-unknown-mbr
+	;;
+    loongarch32:Linux:*:* | loongarch64:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     m32r*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     m68*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     mips:Linux:*:* | mips64:Linux:*:*)
 	set_cc_for_build
 	IS_GLIBC=0
@@ -1049,138 +1132,150 @@ EOF
 	#endif
 	#endif
 EOF
-	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')"
+	cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`
+	eval "$cc_set_vars"
 	test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
 	;;
     mips64el:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     openrisc*:Linux:*:*)
-	echo or1k-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=or1k-unknown-linux-$LIBC
+	;;
     or32:Linux:*:* | or1k*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     padre:Linux:*:*)
-	echo sparc-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=sparc-unknown-linux-$LIBC
+	;;
     parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=hppa64-unknown-linux-$LIBC
+	;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
-	case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in
-	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
-	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
-	  *)    echo hppa-unknown-linux-"$LIBC" ;;
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;;
+	  PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;;
+	  *)    GUESS=hppa-unknown-linux-$LIBC ;;
 	esac
-	exit ;;
+	;;
     ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc64-unknown-linux-$LIBC
+	;;
     ppc:Linux:*:*)
-	echo powerpc-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc-unknown-linux-$LIBC
+	;;
     ppc64le:Linux:*:*)
-	echo powerpc64le-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc64le-unknown-linux-$LIBC
+	;;
     ppcle:Linux:*:*)
-	echo powerpcle-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpcle-unknown-linux-$LIBC
+	;;
     riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     s390:Linux:*:* | s390x:Linux:*:*)
-	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-ibm-linux-$LIBC
+	;;
     sh64*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     sh*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     tile*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     vax:Linux:*:*)
-	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-dec-linux-$LIBC
+	;;
     x86_64:Linux:*:*)
 	set_cc_for_build
+	CPU=$UNAME_MACHINE
 	LIBCABI=$LIBC
 	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		grep IS_X32 >/dev/null
-	    then
-		LIBCABI="$LIBC"x32
-	    fi
+	    ABI=64
+	    sed 's/^	    //' << EOF > "$dummy.c"
+	    #ifdef __i386__
+	    ABI=x86
+	    #else
+	    #ifdef __ILP32__
+	    ABI=x32
+	    #endif
+	    #endif
+EOF
+	    cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'`
+	    eval "$cc_set_abi"
+	    case $ABI in
+		x86) CPU=i686 ;;
+		x32) LIBCABI=${LIBC}x32 ;;
+	    esac
 	fi
-	echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI"
-	exit ;;
+	GUESS=$CPU-pc-linux-$LIBCABI
+	;;
     xtensa*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
 	# earlier versions are messed up and put the nodename in both
 	# sysname and nodename.
-	echo i386-sequent-sysv4
-	exit ;;
+	GUESS=i386-sequent-sysv4
+	;;
     i*86:UNIX_SV:4.2MP:2.*)
 	# Unixware is an offshoot of SVR4, but it has its own version
 	# number series starting with 2...
 	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
 	# Use sysv4.2uw... so that sysv4* matches it.
-	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION
+	;;
     i*86:OS/2:*:*)
-	# If we were able to find `uname', then EMX Unix compatibility
+	# If we were able to find 'uname', then EMX Unix compatibility
 	# is probably installed.
-	echo "$UNAME_MACHINE"-pc-os2-emx
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-os2-emx
+	;;
     i*86:XTS-300:*:STOP)
-	echo "$UNAME_MACHINE"-unknown-stop
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-stop
+	;;
     i*86:atheos:*:*)
-	echo "$UNAME_MACHINE"-unknown-atheos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-atheos
+	;;
     i*86:syllable:*:*)
-	echo "$UNAME_MACHINE"-pc-syllable
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-syllable
+	;;
     i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
-	echo i386-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=i386-unknown-lynxos$UNAME_RELEASE
+	;;
     i*86:*DOS:*:*)
-	echo "$UNAME_MACHINE"-pc-msdosdjgpp
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-msdosdjgpp
+	;;
     i*86:*:4.*:*)
-	UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//')
+	UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
 	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL
 	else
-		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL
 	fi
-	exit ;;
+	;;
     i*86:*:5:[678]*)
 	# UnixWare 7.x, OpenUNIX and OpenServer 6.
-	case $(/bin/uname -X | grep "^Machine") in
+	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
 	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
 	esac
-	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	;;
     i*86:*:3.2:*)
 	if test -f /usr/options/cb.name; then
-		UNAME_REL=$(sed -n 's/.*Version //p' </usr/options/cb.name)
-		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		GUESS=$UNAME_MACHINE-pc-isc$UNAME_REL
 	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //'))
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
 		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
 		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
 			&& UNAME_MACHINE=i586
@@ -1188,11 +1283,11 @@ EOF
 			&& UNAME_MACHINE=i686
 		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
 			&& UNAME_MACHINE=i686
-		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL
 	else
-		echo "$UNAME_MACHINE"-pc-sysv32
+		GUESS=$UNAME_MACHINE-pc-sysv32
 	fi
-	exit ;;
+	;;
     pc:*:*:*)
 	# Left here for compatibility:
 	# uname -m prints for DJGPP always 'pc', but it prints nothing about
@@ -1200,37 +1295,37 @@ EOF
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configure will decide that
 	# this is a cross-build.
-	echo i586-pc-msdosdjgpp
-	exit ;;
+	GUESS=i586-pc-msdosdjgpp
+	;;
     Intel:Mach:3*:*)
-	echo i386-pc-mach3
-	exit ;;
+	GUESS=i386-pc-mach3
+	;;
     paragon:*:*:*)
-	echo i860-intel-osf1
-	exit ;;
+	GUESS=i860-intel-osf1
+	;;
     i860:*:4.*:*) # i860-SVR4
 	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
+	  GUESS=i860-stardent-sysv$UNAME_RELEASE    # Stardent Vistra i860-SVR4
 	else # Add other i860-SVR4 vendors below as they are discovered.
-	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
+	  GUESS=i860-unknown-sysv$UNAME_RELEASE     # Unknown i860-SVR4
 	fi
-	exit ;;
+	;;
     mini*:CTIX:SYS*5:*)
 	# "miniframe"
-	echo m68010-convergent-sysv
-	exit ;;
+	GUESS=m68010-convergent-sysv
+	;;
     mc68k:UNIX:SYSTEM5:3.51m)
-	echo m68k-convergent-sysv
-	exit ;;
+	GUESS=m68k-convergent-sysv
+	;;
     M680?0:D-NIX:5.3:*)
-	echo m68k-diab-dnix
-	exit ;;
+	GUESS=m68k-diab-dnix
+	;;
     M68*:*:R3V[5678]*:*)
 	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
     3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
 	OS_REL=''
 	test -r /etc/.relid \
-	&& OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
 	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
@@ -1241,7 +1336,7 @@ EOF
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
-	    && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
 	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
@@ -1249,118 +1344,121 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
 	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
     m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
-	echo m68k-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-unknown-lynxos$UNAME_RELEASE
+	;;
     mc68030:UNIX_System_V:4.*:*)
-	echo m68k-atari-sysv4
-	exit ;;
+	GUESS=m68k-atari-sysv4
+	;;
     TSUNAMI:LynxOS:2.*:*)
-	echo sparc-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-unknown-lynxos$UNAME_RELEASE
+	;;
     rs6000:LynxOS:2.*:*)
-	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=rs6000-unknown-lynxos$UNAME_RELEASE
+	;;
     PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
-	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-unknown-lynxos$UNAME_RELEASE
+	;;
     SM[BE]S:UNIX_SV:*:*)
-	echo mips-dde-sysv"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-dde-sysv$UNAME_RELEASE
+	;;
     RM*:ReliantUNIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
+	GUESS=mips-sni-sysv4
+	;;
     RM*:SINIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
+	GUESS=mips-sni-sysv4
+	;;
     *:SINIX-*:*:*)
 	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=$( (uname -p) 2>/dev/null)
-		echo "$UNAME_MACHINE"-sni-sysv4
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		GUESS=$UNAME_MACHINE-sni-sysv4
 	else
-		echo ns32k-sni-sysv
+		GUESS=ns32k-sni-sysv
 	fi
-	exit ;;
-    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+	;;
+    PENTIUM:*:4.0*:*)	# Unisys 'ClearPath HMP IX 4000' SVR4/MP effort
 			# says <Richard.M.Bartel@ccMail.Census.GOV>
-	echo i586-unisys-sysv4
-	exit ;;
+	GUESS=i586-unisys-sysv4
+	;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes@openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
-	echo hppa1.1-stratus-sysv4
-	exit ;;
+	GUESS=hppa1.1-stratus-sysv4
+	;;
     *:*:*:FTX*)
 	# From seanf@swdc.stratus.com.
-	echo i860-stratus-sysv4
-	exit ;;
+	GUESS=i860-stratus-sysv4
+	;;
     i*86:VOS:*:*)
 	# From Paul.Green@stratus.com.
-	echo "$UNAME_MACHINE"-stratus-vos
-	exit ;;
+	GUESS=$UNAME_MACHINE-stratus-vos
+	;;
     *:VOS:*:*)
 	# From Paul.Green@stratus.com.
-	echo hppa1.1-stratus-vos
-	exit ;;
+	GUESS=hppa1.1-stratus-vos
+	;;
     mc68*:A/UX:*:*)
-	echo m68k-apple-aux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-apple-aux$UNAME_RELEASE
+	;;
     news*:NEWS-OS:6*:*)
-	echo mips-sony-newsos6
-	exit ;;
+	GUESS=mips-sony-newsos6
+	;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if test -d /usr/nec; then
-		echo mips-nec-sysv"$UNAME_RELEASE"
+		GUESS=mips-nec-sysv$UNAME_RELEASE
 	else
-		echo mips-unknown-sysv"$UNAME_RELEASE"
+		GUESS=mips-unknown-sysv$UNAME_RELEASE
 	fi
-	exit ;;
+	;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
-	echo powerpc-be-beos
-	exit ;;
+	GUESS=powerpc-be-beos
+	;;
     BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
-	echo powerpc-apple-beos
-	exit ;;
+	GUESS=powerpc-apple-beos
+	;;
     BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
-	echo i586-pc-beos
-	exit ;;
+	GUESS=i586-pc-beos
+	;;
     BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
-	echo i586-pc-haiku
-	exit ;;
-    x86_64:Haiku:*:*)
-	echo x86_64-unknown-haiku
-	exit ;;
+	GUESS=i586-pc-haiku
+	;;
+    ppc:Haiku:*:*)	# Haiku running on Apple PowerPC
+	GUESS=powerpc-apple-haiku
+	;;
+    *:Haiku:*:*)	# Haiku modern gcc (not bound by BeOS compat)
+	GUESS=$UNAME_MACHINE-unknown-haiku
+	;;
     SX-4:SUPER-UX:*:*)
-	echo sx4-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx4-nec-superux$UNAME_RELEASE
+	;;
     SX-5:SUPER-UX:*:*)
-	echo sx5-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx5-nec-superux$UNAME_RELEASE
+	;;
     SX-6:SUPER-UX:*:*)
-	echo sx6-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx6-nec-superux$UNAME_RELEASE
+	;;
     SX-7:SUPER-UX:*:*)
-	echo sx7-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx7-nec-superux$UNAME_RELEASE
+	;;
     SX-8:SUPER-UX:*:*)
-	echo sx8-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx8-nec-superux$UNAME_RELEASE
+	;;
     SX-8R:SUPER-UX:*:*)
-	echo sx8r-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx8r-nec-superux$UNAME_RELEASE
+	;;
     SX-ACE:SUPER-UX:*:*)
-	echo sxace-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sxace-nec-superux$UNAME_RELEASE
+	;;
     Power*:Rhapsody:*:*)
-	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-apple-rhapsody$UNAME_RELEASE
+	;;
     *:Rhapsody:*:*)
-	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE
+	;;
     arm64:Darwin:*:*)
-	echo aarch64-apple-darwin"$UNAME_RELEASE"
-	exit ;;
+	GUESS=aarch64-apple-darwin$UNAME_RELEASE
+	;;
     *:Darwin:*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	case $UNAME_PROCESSOR in
 	    unknown) UNAME_PROCESSOR=powerpc ;;
 	esac
@@ -1394,109 +1492,125 @@ EOF
 	    # uname -m returns i386 or x86_64
 	    UNAME_PROCESSOR=$UNAME_MACHINE
 	fi
-	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE
+	;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	if test "$UNAME_PROCESSOR" = x86; then
 		UNAME_PROCESSOR=i386
 		UNAME_MACHINE=pc
 	fi
-	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE
+	;;
     *:QNX:*:4*)
-	echo i386-pc-qnx
-	exit ;;
+	GUESS=i386-pc-qnx
+	;;
     NEO-*:NONSTOP_KERNEL:*:*)
-	echo neo-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=neo-tandem-nsk$UNAME_RELEASE
+	;;
     NSE-*:NONSTOP_KERNEL:*:*)
-	echo nse-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nse-tandem-nsk$UNAME_RELEASE
+	;;
     NSR-*:NONSTOP_KERNEL:*:*)
-	echo nsr-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsr-tandem-nsk$UNAME_RELEASE
+	;;
     NSV-*:NONSTOP_KERNEL:*:*)
-	echo nsv-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsv-tandem-nsk$UNAME_RELEASE
+	;;
     NSX-*:NONSTOP_KERNEL:*:*)
-	echo nsx-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsx-tandem-nsk$UNAME_RELEASE
+	;;
     *:NonStop-UX:*:*)
-	echo mips-compaq-nonstopux
-	exit ;;
+	GUESS=mips-compaq-nonstopux
+	;;
     BS2000:POSIX*:*:*)
-	echo bs2000-siemens-sysv
-	exit ;;
+	GUESS=bs2000-siemens-sysv
+	;;
     DS/*:UNIX_System_V:*:*)
-	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE
+	;;
     *:Plan9:*:*)
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
 	# operating systems.
-	# shellcheck disable=SC2154
-	if test "$cputype" = 386; then
+	if test "${cputype-}" = 386; then
 	    UNAME_MACHINE=i386
-	else
-	    UNAME_MACHINE="$cputype"
+	elif test "x${cputype-}" != x; then
+	    UNAME_MACHINE=$cputype
 	fi
-	echo "$UNAME_MACHINE"-unknown-plan9
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-plan9
+	;;
     *:TOPS-10:*:*)
-	echo pdp10-unknown-tops10
-	exit ;;
+	GUESS=pdp10-unknown-tops10
+	;;
     *:TENEX:*:*)
-	echo pdp10-unknown-tenex
-	exit ;;
+	GUESS=pdp10-unknown-tenex
+	;;
     KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
-	echo pdp10-dec-tops20
-	exit ;;
+	GUESS=pdp10-dec-tops20
+	;;
     XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
-	echo pdp10-xkl-tops20
-	exit ;;
+	GUESS=pdp10-xkl-tops20
+	;;
     *:TOPS-20:*:*)
-	echo pdp10-unknown-tops20
-	exit ;;
+	GUESS=pdp10-unknown-tops20
+	;;
     *:ITS:*:*)
-	echo pdp10-unknown-its
-	exit ;;
+	GUESS=pdp10-unknown-its
+	;;
     SEI:*:*:SEIUX)
-	echo mips-sei-seiux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-sei-seiux$UNAME_RELEASE
+	;;
     *:DragonFly:*:*)
-	echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
-	exit ;;
+	DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL
+	;;
     *:*VMS:*:*)
-	UNAME_MACHINE=$( (uname -p) 2>/dev/null)
-	case "$UNAME_MACHINE" in
-	    A*) echo alpha-dec-vms ; exit ;;
-	    I*) echo ia64-dec-vms ; exit ;;
-	    V*) echo vax-dec-vms ; exit ;;
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case $UNAME_MACHINE in
+	    A*) GUESS=alpha-dec-vms ;;
+	    I*) GUESS=ia64-dec-vms ;;
+	    V*) GUESS=vax-dec-vms ;;
 	esac ;;
     *:XENIX:*:SysV)
-	echo i386-pc-xenix
-	exit ;;
+	GUESS=i386-pc-xenix
+	;;
     i*86:skyos:*:*)
-	echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')"
-	exit ;;
+	SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`
+	GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL
+	;;
     i*86:rdos:*:*)
-	echo "$UNAME_MACHINE"-pc-rdos
-	exit ;;
-    i*86:AROS:*:*)
-	echo "$UNAME_MACHINE"-pc-aros
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-rdos
+	;;
+    i*86:Fiwix:*:*)
+	GUESS=$UNAME_MACHINE-pc-fiwix
+	;;
+    *:AROS:*:*)
+	GUESS=$UNAME_MACHINE-unknown-aros
+	;;
     x86_64:VMkernel:*:*)
-	echo "$UNAME_MACHINE"-unknown-esx
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-esx
+	;;
     amd64:Isilon\ OneFS:*:*)
-	echo x86_64-unknown-onefs
-	exit ;;
+	GUESS=x86_64-unknown-onefs
+	;;
     *:Unleashed:*:*)
-	echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE
+	;;
+    x86_64:[Ii]ronclad:*:*|i?86:[Ii]ronclad:*:*)
+	GUESS=$UNAME_MACHINE-pc-ironclad-mlibc
+	;;
+    *:[Ii]ronclad:*:*)
+	GUESS=$UNAME_MACHINE-unknown-ironclad-mlibc
+	;;
 esac
 
+# Do we have a guess based on uname results?
+if test "x$GUESS" != x; then
+    echo "$GUESS"
+    exit
+fi
+
 # No uname command or uname output not recognized.
 set_cc_for_build
 cat > "$dummy.c" <<EOF
@@ -1512,6 +1626,7 @@ cat > "$dummy.c" <<EOF
 #endif
 #endif
 #endif
+int
 main ()
 {
 #if defined (sony)
@@ -1536,7 +1651,7 @@ main ()
 #define __ARCHITECTURE__ "m68k"
 #endif
   int version;
-  version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null);
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
   if (version < 4)
     printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
   else
@@ -1628,7 +1743,7 @@ main ()
 }
 EOF
 
-$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) &&
+$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` &&
 	{ echo "$SYSTEM_NAME"; exit; }
 
 # Apollos put the system type in the environment.
@@ -1636,7 +1751,7 @@ test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
 
 echo "$0: unable to guess system type" >&2
 
-case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+case $UNAME_MACHINE:$UNAME_SYSTEM in
     mips:Linux | mips64:Linux)
 	# If we got here on MIPS GNU/Linux, output extra information.
 	cat >&2 <<EOF
@@ -1658,9 +1773,11 @@ and
   https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
 EOF
 
-year=$(echo $timestamp | sed 's,-.*,,')
+our_year=`echo $timestamp | sed 's,-.*,,'`
+thisyear=`date +%Y`
 # shellcheck disable=SC2003
-if test "$(expr "$(date +%Y)" - "$year")" -lt 3 ; then
+script_age=`expr "$thisyear" - "$our_year"`
+if test "$script_age" -lt 3 ; then
    cat >&2 <<EOF
 
 If $0 has already been updated, send the following data and any
@@ -1669,20 +1786,20 @@ provide the necessary information to handle your system.
 
 config.guess timestamp = $timestamp
 
-uname -m = $( (uname -m) 2>/dev/null || echo unknown)
-uname -r = $( (uname -r) 2>/dev/null || echo unknown)
-uname -s = $( (uname -s) 2>/dev/null || echo unknown)
-uname -v = $( (uname -v) 2>/dev/null || echo unknown)
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
 
-/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null)
-/bin/uname -X     = $( (/bin/uname -X) 2>/dev/null)
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
 
-hostinfo               = $( (hostinfo) 2>/dev/null)
-/bin/universe          = $( (/bin/universe) 2>/dev/null)
-/usr/bin/arch -k       = $( (/usr/bin/arch -k) 2>/dev/null)
-/bin/arch              = $( (/bin/arch) 2>/dev/null)
-/usr/bin/oslevel       = $( (/usr/bin/oslevel) 2>/dev/null)
-/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null)
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
 
 UNAME_MACHINE = "$UNAME_MACHINE"
 UNAME_RELEASE = "$UNAME_RELEASE"
@@ -1694,8 +1811,8 @@ fi
 exit 1
 
 # Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp nil t)
 # time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-format: "%Y-%02m-%02d"
 # time-stamp-end: "'"
 # End:
diff --git a/build-aux/config.sub b/build-aux/config.sub
index b0f84923..3d35cde1 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,12 +1,14 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2021 Free Software Foundation, Inc.
+#   Copyright 1992-2025 Free Software Foundation, Inc.
 
-timestamp='2021-01-07'
+# shellcheck disable=SC2006,SC2268,SC2162 # see below for rationale
+
+timestamp='2025-07-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
+# the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
@@ -50,7 +52,14 @@ timestamp='2021-01-07'
 #	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
 # It is wrong to echo any other type of specification.
 
-me=$(echo "$0" | sed -e 's,.*/,,')
+# The "shellcheck disable" line above the timestamp inhibits complaints
+# about features and limitations of the classic Bourne shell that were
+# superseded or lifted in POSIX.  However, this script identifies a wide
+# variety of pre-POSIX systems that do not have POSIX shells at all, and
+# even some reasonably current systems (Solaris 10 as case-in-point) still
+# have a pre-POSIX /bin/sh.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
 Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
@@ -67,13 +76,13 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2021 Free Software Foundation, Inc.
+Copyright 1992-2025 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
 
 help="
-Try \`$me --help' for more information."
+Try '$me --help' for more information."
 
 # Parse command line
 while test $# -gt 0 ; do
@@ -111,15 +120,16 @@ case $# in
 esac
 
 # Split fields of configuration type
-# shellcheck disable=SC2162
+saved_IFS=$IFS
 IFS="-" read field1 field2 field3 field4 <<EOF
 $1
 EOF
+IFS=$saved_IFS
 
 # Separate into logical components for further validation
 case $1 in
 	*-*-*-*-*)
-		echo Invalid configuration \`"$1"\': more than four components >&2
+		echo "Invalid configuration '$1': more than four components" >&2
 		exit 1
 		;;
 	*-*-*-*)
@@ -131,10 +141,22 @@ case $1 in
 		# parts
 		maybe_os=$field2-$field3
 		case $maybe_os in
-			nto-qnx* | linux-* | uclinux-uclibc* \
-			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
-			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
-			| storm-chaos* | os2-emx* | rtmk-nova*)
+			  cloudabi*-eabi* \
+			| kfreebsd*-gnu* \
+			| knetbsd*-gnu* \
+			| kopensolaris*-gnu* \
+			| ironclad-* \
+			| linux-* \
+			| managarm-* \
+			| netbsd*-eabi* \
+			| netbsd*-gnu* \
+			| nto-qnx* \
+			| os2-emx* \
+			| rtmk-nova* \
+			| storm-chaos* \
+			| uclinux-gnu* \
+			| uclinux-uclibc* \
+			| windows-* )
 				basic_machine=$field1
 				basic_os=$maybe_os
 				;;
@@ -149,8 +171,12 @@ case $1 in
 		esac
 		;;
 	*-*)
-		# A lone config we happen to match not fitting any pattern
 		case $field1-$field2 in
+			# Shorthands that happen to contain a single dash
+			convex-c[12] | convex-c3[248])
+				basic_machine=$field2-convex
+				basic_os=
+				;;
 			decstation-3100)
 				basic_machine=mips-dec
 				basic_os=
@@ -158,24 +184,87 @@ case $1 in
 			*-*)
 				# Second component is usually, but not always the OS
 				case $field2 in
-					# Prevent following clause from handling this valid os
+					# Do not treat sunos as a manufacturer
 					sun*os*)
 						basic_machine=$field1
 						basic_os=$field2
 						;;
 					# Manufacturers
-					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
-					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
-					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
-					| convergent* | ncr* | news | 32* | 3600* | 3100* \
-					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
-					| ultra | tti* | harris | dolphin | highlevel | gould \
-					| cbm | ns | masscomp | apple | axis | knuth | cray \
-					| microblaze* | sim | cisco \
-					| oki | wec | wrs | winbond)
+					  3100* \
+					| 32* \
+					| 3300* \
+					| 3600* \
+					| 7300* \
+					| acorn \
+					| altos* \
+					| apollo \
+					| apple \
+					| atari \
+					| att* \
+					| axis \
+					| be \
+					| bull \
+					| cbm \
+					| ccur \
+					| cisco \
+					| commodore \
+					| convergent* \
+					| convex* \
+					| cray \
+					| crds \
+					| dec* \
+					| delta* \
+					| dg \
+					| digital \
+					| dolphin \
+					| encore* \
+					| gould \
+					| harris \
+					| highlevel \
+					| hitachi* \
+					| hp \
+					| ibm* \
+					| intergraph \
+					| isi* \
+					| knuth \
+					| masscomp \
+					| microblaze* \
+					| mips* \
+					| motorola* \
+					| ncr* \
+					| news \
+					| next \
+					| ns \
+					| oki \
+					| omron* \
+					| pc533* \
+					| rebel \
+					| rom68k \
+					| rombug \
+					| semi \
+					| sequent* \
+					| sgi* \
+					| siemens \
+					| sim \
+					| sni \
+					| sony* \
+					| stratus \
+					| sun \
+					| sun[234]* \
+					| tektronix \
+					| tti* \
+					| ultra \
+					| unicom* \
+					| wec \
+					| winbond \
+					| wrs)
 						basic_machine=$field1-$field2
 						basic_os=
 						;;
+					tock* | zephyr*)
+						basic_machine=$field1-unknown
+						basic_os=$field2
+						;;
 					*)
 						basic_machine=$field1
 						basic_os=$field2
@@ -256,26 +345,6 @@ case $1 in
 				basic_machine=arm-unknown
 				basic_os=cegcc
 				;;
-			convex-c1)
-				basic_machine=c1-convex
-				basic_os=bsd
-				;;
-			convex-c2)
-				basic_machine=c2-convex
-				basic_os=bsd
-				;;
-			convex-c32)
-				basic_machine=c32-convex
-				basic_os=bsd
-				;;
-			convex-c34)
-				basic_machine=c34-convex
-				basic_os=bsd
-				;;
-			convex-c38)
-				basic_machine=c38-convex
-				basic_os=bsd
-				;;
 			cray)
 				basic_machine=j90-cray
 				basic_os=unicos
@@ -698,15 +767,26 @@ case $basic_machine in
 		vendor=dec
 		basic_os=tops20
 		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
+	delta | 3300 | delta-motorola | 3300-motorola | motorola-delta | motorola-3300)
 		cpu=m68k
 		vendor=motorola
 		;;
-	dpx2*)
+	# This used to be dpx2*, but that gets the RS6000-based
+	# DPX/20 and the x86-based DPX/2-100 wrong.  See
+	# https://oldskool.silicium.org/stations/bull_dpx20.htm
+	# https://www.feb-patrimoine.com/english/bull_dpx2.htm
+	# https://www.feb-patrimoine.com/english/unix_and_bull.htm
+	dpx2 | dpx2[23]00 | dpx2[23]xx)
 		cpu=m68k
 		vendor=bull
-		basic_os=sysv3
+		;;
+	dpx2100 | dpx21xx)
+		cpu=i386
+		vendor=bull
+		;;
+	dpx20)
+		cpu=rs6000
+		vendor=bull
 		;;
 	encore | umax | mmax)
 		cpu=ns32k
@@ -769,22 +849,22 @@ case $basic_machine in
 		vendor=hp
 		;;
 	i*86v32)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv32
 		;;
 	i*86v4*)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv4
 		;;
 	i*86v)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv
 		;;
 	i*86sol2)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=solaris2
 		;;
@@ -821,18 +901,6 @@ case $basic_machine in
 	next | m*-next)
 		cpu=m68k
 		vendor=next
-		case $basic_os in
-		    openstep*)
-		        ;;
-		    nextstep*)
-			;;
-		    ns2*)
-		      basic_os=nextstep2
-			;;
-		    *)
-		      basic_os=nextstep3
-			;;
-		esac
 		;;
 	np1)
 		cpu=np1
@@ -917,16 +985,17 @@ case $basic_machine in
 		;;
 	leon-*|leon[3-9]-*)
 		cpu=sparc
-		vendor=$(echo "$basic_machine" | sed 's/-.*//')
+		vendor=`echo "$basic_machine" | sed 's/-.*//'`
 		;;
 
 	*-*)
-		# shellcheck disable=SC2162
+		saved_IFS=$IFS
 		IFS="-" read cpu vendor <<EOF
 $basic_machine
 EOF
+		IFS=$saved_IFS
 		;;
-	# We use `pc' rather than `unknown'
+	# We use 'pc' rather than 'unknown'
 	# because (1) that's what they normally are, and
 	# (2) the word "unknown" tends to confuse beginning users.
 	i*86 | x86_64)
@@ -954,15 +1023,19 @@ unset -v basic_machine
 
 # Decode basic machines in the full and proper CPU-Company form.
 case $cpu-$vendor in
-	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
-	# some cases the only manufacturer, in others, it is the most popular.
+	# Here we handle the default manufacturer of certain CPU types in canonical form.
+	# It is in some cases the only manufacturer, in others, it is the most popular.
+	c[12]-convex | c[12]-unknown | c3[248]-convex | c3[248]-unknown)
+		vendor=convex
+		basic_os=${basic_os:-bsd}
+		;;
 	craynv-unknown)
 		vendor=cray
 		basic_os=${basic_os:-unicosmp}
 		;;
 	c90-unknown | c90-cray)
 		vendor=cray
-		basic_os=${Basic_os:-unicos}
+		basic_os=${basic_os:-unicos}
 		;;
 	fx80-unknown)
 		vendor=alliant
@@ -1003,11 +1076,34 @@ case $cpu-$vendor in
 		;;
 
 	# Here we normalize CPU types with a missing or matching vendor
-	dpx20-unknown | dpx20-bull)
-		cpu=rs6000
-		vendor=bull
+	armh-unknown | armh-alt)
+		cpu=armv7l
+		vendor=alt
+		basic_os=${basic_os:-linux-gnueabihf}
+		;;
+
+	# Normalized CPU+vendor pairs that imply an OS, if not otherwise specified
+	m68k-isi)
+		basic_os=${basic_os:-sysv}
+		;;
+	m68k-sony)
+		basic_os=${basic_os:-newsos}
+		;;
+	m68k-tektronix)
+		basic_os=${basic_os:-bsd}
+		;;
+	m88k-harris)
+		basic_os=${basic_os:-sysv3}
+		;;
+	i386-bull | m68k-bull)
+		basic_os=${basic_os:-sysv3}
+		;;
+	rs6000-bull)
 		basic_os=${basic_os:-bosx}
 		;;
+	mips-sni)
+		basic_os=${basic_os:-sysv4}
+		;;
 
 	# Here we normalize CPU types irrespective of the vendor
 	amd64-*)
@@ -1015,7 +1111,7 @@ case $cpu-$vendor in
 		;;
 	blackfin-*)
 		cpu=bfin
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	c54x-*)
 		cpu=tic54x
@@ -1038,7 +1134,7 @@ case $cpu-$vendor in
 		;;
 	m68knommu-*)
 		cpu=m68k
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
 		cpu=s12z
@@ -1048,12 +1144,12 @@ case $cpu-$vendor in
 		;;
 	parisc-*)
 		cpu=hppa
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
 		cpu=i586
 		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
+	pentiumpro-* | p6-* | 6x86-* | athlon-* | athlon_*-*)
 		cpu=i686
 		;;
 	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
@@ -1062,9 +1158,6 @@ case $cpu-$vendor in
 	pentium4-*)
 		cpu=i786
 		;;
-	pc98-*)
-		cpu=i386
-		;;
 	ppc-* | ppcbe-*)
 		cpu=powerpc
 		;;
@@ -1084,7 +1177,7 @@ case $cpu-$vendor in
 		cpu=mipsisa64sb1el
 		;;
 	sh5e[lb]-*)
-		cpu=$(echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/')
+		cpu=`echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/'`
 		;;
 	spur-*)
 		cpu=spur
@@ -1098,13 +1191,10 @@ case $cpu-$vendor in
 	tx39el-*)
 		cpu=mipstx39el
 		;;
-	x64-*)
-		cpu=x86_64
-		;;
 	xscale-* | xscalee[bl]-*)
-		cpu=$(echo "$cpu" | sed 's/^xscale/arm/')
+		cpu=`echo "$cpu" | sed 's/^xscale/arm/'`
 		;;
-	arm64-*)
+	arm64-* | aarch64le-* | arm64_32-*)
 		cpu=aarch64
 		;;
 
@@ -1156,110 +1246,232 @@ case $cpu-$vendor in
 		# Recognize the canonical CPU types that are allowed with any
 		# company name.
 		case $cpu in
-			1750a | 580 \
+			  1750a \
+			| 580 \
+			| [cjt]90 \
 			| a29k \
-			| aarch64 | aarch64_be \
+			| aarch64 \
+			| aarch64_be \
+			| aarch64c \
 			| abacus \
-			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
-			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
-			| alphapca5[67] | alpha64pca5[67] \
+			| alpha \
+			| alpha64 \
+			| alpha64ev56 \
+			| alpha64ev6[78] \
+			| alpha64ev[4-8] \
+			| alpha64pca5[67] \
+			| alphaev56 \
+			| alphaev6[78] \
+			| alphaev[4-8] \
+			| alphapca5[67] \
 			| am33_2.0 \
 			| amdgcn \
-			| arc | arceb \
-			| arm | arm[lb]e | arme[lb] | armv* \
-			| avr | avr32 \
+			| arc \
+			| arc32 \
+			| arc64 \
+			| arceb \
+			| arm \
+			| arm64e \
+			| arm64ec \
+			| arm[lb]e \
+			| arme[lb] \
+			| armv* \
 			| asmjs \
+			| avr \
+			| avr32 \
 			| ba \
-			| be32 | be64 \
-			| bfin | bpf | bs2000 \
-			| c[123]* | c30 | [cjt]90 | c4x \
-			| c8051 | clipper | craynv | csky | cydra \
-			| d10v | d30v | dlx | dsp16xx \
-			| e2k | elxsi | epiphany \
-			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
-			| h8300 | h8500 \
-			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+			| be32 \
+			| be64 \
+			| bfin \
+			| bpf \
+			| bs2000 \
+			| c30 \
+			| c4x \
+			| c8051 \
+			| c[123]* \
+			| clipper \
+			| craynv \
+			| csky \
+			| cydra \
+			| d10v \
+			| d30v \
+			| dlx \
+			| dsp16xx \
+			| e2k \
+			| elxsi \
+			| epiphany \
+			| f30[01] \
+			| f700 \
+			| fido \
+			| fr30 \
+			| frv \
+			| ft32 \
+			| fx80 \
+			| h8300 \
+			| h8500 \
 			| hexagon \
-			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
-			| ip2k | iq2000 \
+			| hppa \
+			| hppa1.[01] \
+			| hppa2.0 \
+			| hppa2.0[nw] \
+			| hppa64 \
+			| i*86 \
+			| i370 \
+			| i860 \
+			| i960 \
+			| ia16 \
+			| ia64 \
+			| intelgt \
+			| ip2k \
+			| iq2000 \
+			| javascript \
 			| k1om \
-			| le32 | le64 \
+			| kvx \
+			| le32 \
+			| le64 \
 			| lm32 \
-			| loongarch32 | loongarch64 | loongarchx32 \
-			| m32c | m32r | m32rle \
-			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
-			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
-			| m88110 | m88k | maxq | mb | mcore | mep | metag \
-			| microblaze | microblazeel \
-			| mips | mipsbe | mipseb | mipsel | mipsle \
-			| mips16 \
-			| mips64 | mips64eb | mips64el \
-			| mips64octeon | mips64octeonel \
-			| mips64orion | mips64orionel \
-			| mips64r5900 | mips64r5900el \
-			| mips64vr | mips64vrel \
-			| mips64vr4100 | mips64vr4100el \
-			| mips64vr4300 | mips64vr4300el \
-			| mips64vr5000 | mips64vr5000el \
-			| mips64vr5900 | mips64vr5900el \
-			| mipsisa32 | mipsisa32el \
-			| mipsisa32r2 | mipsisa32r2el \
-			| mipsisa32r6 | mipsisa32r6el \
-			| mipsisa64 | mipsisa64el \
-			| mipsisa64r2 | mipsisa64r2el \
-			| mipsisa64r6 | mipsisa64r6el \
-			| mipsisa64sb1 | mipsisa64sb1el \
-			| mipsisa64sr71k | mipsisa64sr71kel \
-			| mipsr5900 | mipsr5900el \
-			| mipstx39 | mipstx39el \
+			| loongarch32 \
+			| loongarch64 \
+			| m32c \
+			| m32r \
+			| m32rle \
+			| m5200 \
+			| m68000 \
+			| m680[012346]0 \
+			| m6811 \
+			| m6812 \
+			| m68360 \
+			| m683?2 \
+			| m68hc11 \
+			| m68hc12 \
+			| m68hcs12x \
+			| m68k \
+			| m88110 \
+			| m88k \
+			| maxq \
+			| mb \
+			| mcore \
+			| mep \
+			| metag \
+			| microblaze \
+			| microblazeel \
+			| mips* \
 			| mmix \
-			| mn10200 | mn10300 \
+			| mn10200 \
+			| mn10300 \
 			| moxie \
-			| mt \
 			| msp430 \
-			| nds32 | nds32le | nds32be \
+			| mt \
+			| nanomips* \
+			| nds32 \
+			| nds32be \
+			| nds32le \
 			| nfp \
-			| nios | nios2 | nios2eb | nios2el \
-			| none | np1 | ns16k | ns32k | nvptx \
+			| nios \
+			| nios2 \
+			| nios2eb \
+			| nios2el \
+			| none \
+			| np1 \
+			| ns16k \
+			| ns32k \
+			| nvptx \
 			| open8 \
 			| or1k* \
 			| or32 \
 			| orion \
+			| pdp10 \
+			| pdp11 \
 			| picochip \
-			| pdp10 | pdp11 | pj | pjl | pn | power \
-			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
+			| pj \
+			| pjl \
+			| pn \
+			| power \
+			| powerpc \
+			| powerpc64 \
+			| powerpc64le \
+			| powerpcle \
+			| powerpcspe \
 			| pru \
 			| pyramid \
-			| riscv | riscv32 | riscv32be | riscv64 | riscv64be \
-			| rl78 | romp | rs6000 | rx \
-			| s390 | s390x \
+			| riscv \
+			| riscv32 \
+			| riscv32be \
+			| riscv64 \
+			| riscv64be \
+			| rl78 \
+			| romp \
+			| rs6000 \
+			| rx \
+			| s390 \
+			| s390x \
 			| score \
-			| sh | shl \
-			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
-			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
-			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
+			| sh \
+			| sh64 \
+			| sh64le \
+			| sh[12345][lb]e \
+			| sh[1234] \
+			| sh[1234]e[lb] \
+			| sh[23]e \
+			| sh[23]ele \
+			| sh[24]a \
+			| sh[24]ae[lb] \
+			| sh[lb]e \
+			| she[lb] \
+			| shl \
+			| sparc \
+			| sparc64 \
+			| sparc64b \
+			| sparc64v \
+			| sparc86x \
+			| sparclet \
 			| sparclite \
-			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
+			| sparcv8 \
+			| sparcv9 \
+			| sparcv9b \
+			| sparcv9v \
 			| spu \
+			| sv1 \
+			| sx* \
 			| tahoe \
 			| thumbv7* \
-			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
+			| tic30 \
+			| tic4x \
+			| tic54x \
+			| tic55x \
+			| tic6x \
+			| tic80 \
 			| tron \
 			| ubicom32 \
-			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
+			| v70 \
+			| v810 \
+			| v850 \
+			| v850e \
+			| v850e1 \
+			| v850e2 \
+			| v850e2v3 \
+			| v850es \
 			| vax \
+			| vc4 \
 			| visium \
 			| w65 \
-			| wasm32 | wasm64 \
+			| wasm32 \
+			| wasm64 \
 			| we32k \
-			| x86 | x86_64 | xc16x | xgate | xps100 \
-			| xstormy16 | xtensa* \
+			| x86 \
+			| x86_64 \
+			| xc16x \
+			| xgate \
+			| xps100 \
+			| xstormy16 \
+			| xtensa* \
 			| ymp \
-			| z8k | z80)
+			| z80 \
+			| z8k)
 				;;
 
 			*)
-				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
+				echo "Invalid configuration '$1': machine '$cpu-$vendor' not recognized" 1>&2
 				exit 1
 				;;
 		esac
@@ -1280,38 +1492,48 @@ esac
 
 # Decode manufacturer-specific aliases for certain operating systems.
 
-if test x$basic_os != x
+if test x"$basic_os" != x
 then
 
-# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
+# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just
 # set os.
+obj=
 case $basic_os in
 	gnu/linux*)
 		kernel=linux
-		os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|')
+		os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'`
 		;;
 	os2-emx)
 		kernel=os2
-		os=$(echo $basic_os | sed -e 's|os2-emx|emx|')
+		os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'`
 		;;
 	nto-qnx*)
 		kernel=nto
-		os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|')
+		os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'`
 		;;
 	*-*)
-		# shellcheck disable=SC2162
+		saved_IFS=$IFS
 		IFS="-" read kernel os <<EOF
 $basic_os
 EOF
+		IFS=$saved_IFS
 		;;
 	# Default OS when just kernel was specified
 	nto*)
 		kernel=nto
-		os=$(echo $basic_os | sed -e 's|nto|qnx|')
+		os=`echo "$basic_os" | sed -e 's|nto|qnx|'`
+		;;
+	ironclad*)
+		kernel=ironclad
+		os=`echo "$basic_os" | sed -e 's|ironclad|mlibc|'`
 		;;
 	linux*)
 		kernel=linux
-		os=$(echo $basic_os | sed -e 's|linux|gnu|')
+		os=`echo "$basic_os" | sed -e 's|linux|gnu|'`
+		;;
+	managarm*)
+		kernel=managarm
+		os=`echo "$basic_os" | sed -e 's|managarm|mlibc|'`
 		;;
 	*)
 		kernel=
@@ -1332,7 +1554,7 @@ case $os in
 		os=cnk
 		;;
 	solaris1 | solaris1.*)
-		os=$(echo $os | sed -e 's|solaris1|sunos4|')
+		os=`echo "$os" | sed -e 's|solaris1|sunos4|'`
 		;;
 	solaris)
 		os=solaris2
@@ -1340,6 +1562,23 @@ case $os in
 	unixware*)
 		os=sysv4.2uw
 		;;
+	# The marketing names for NeXT's operating systems were
+	# NeXTSTEP, NeXTSTEP 2, OpenSTEP 3, OpenSTEP 4.  'openstep' is
+	# mapped to 'openstep3', but 'openstep1' and 'openstep2' are
+	# mapped to 'nextstep' and 'nextstep2', consistent with the
+	# treatment of SunOS/Solaris.
+	ns | ns1 | nextstep | nextstep1 | openstep1)
+		os=nextstep
+		;;
+	ns2 | nextstep2 | openstep2)
+		os=nextstep2
+		;;
+	ns3 | nextstep3 | openstep | openstep3)
+		os=openstep3
+		;;
+	ns4 | nextstep4 | openstep4)
+		os=openstep4
+		;;
 	# es1800 is here to avoid being matched by es* (a different OS)
 	es1800*)
 		os=ose
@@ -1361,7 +1600,7 @@ case $os in
 		os=sco3.2v4
 		;;
 	sco3.2.[4-9]*)
-		os=$(echo $os | sed -e 's/sco3.2./sco3.2v/')
+		os=`echo "$os" | sed -e 's/sco3.2./sco3.2v/'`
 		;;
 	sco*v* | scout)
 		# Don't match below
@@ -1391,7 +1630,7 @@ case $os in
 		os=lynxos
 		;;
 	mac[0-9]*)
-		os=$(echo "$os" | sed -e 's|mac|macos|')
+		os=`echo "$os" | sed -e 's|mac|macos|'`
 		;;
 	opened*)
 		os=openedition
@@ -1400,16 +1639,17 @@ case $os in
 		os=os400
 		;;
 	sunos5*)
-		os=$(echo "$os" | sed -e 's|sunos5|solaris2|')
+		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
 		;;
 	sunos6*)
-		os=$(echo "$os" | sed -e 's|sunos6|solaris3|')
+		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
 		;;
 	wince*)
 		os=wince
 		;;
 	utek*)
 		os=bsd
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|tektronix|'`
 		;;
 	dynix*)
 		os=bsd
@@ -1426,21 +1666,25 @@ case $os in
 	386bsd)
 		os=bsd
 		;;
-	ctix* | uts*)
+	ctix*)
+		os=sysv
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|convergent|'`
+		;;
+	uts*)
 		os=sysv
 		;;
 	nova*)
-		os=rtmk-nova
-		;;
-	ns2)
-		os=nextstep2
+		kernel=rtmk
+		os=nova
 		;;
 	# Preserve the version number of sinix5.
 	sinix5.*)
-		os=$(echo $os | sed -e 's|sinix|sysv|')
+		os=`echo "$os" | sed -e 's|sinix|sysv|'`
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|sni|'`
 		;;
 	sinix*)
 		os=sysv4
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|sni|'`
 		;;
 	tpf*)
 		os=tpf
@@ -1478,10 +1722,16 @@ case $os in
 			os=eabi
 			;;
 		    *)
-			os=elf
+			os=
+			obj=elf
 			;;
 		esac
 		;;
+	aout* | coff* | elf* | pe*)
+		# These are machine code file formats, not OSes
+		obj=$os
+		os=
+		;;
 	*)
 		# No normalization, but not necessarily accepted, that comes below.
 		;;
@@ -1500,12 +1750,15 @@ else
 # system, and we'll never get to this point.
 
 kernel=
+obj=
 case $cpu-$vendor in
 	score-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	spu-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	*-acorn)
 		os=riscix1.2
@@ -1515,28 +1768,35 @@ case $cpu-$vendor in
 		os=gnu
 		;;
 	arm*-semi)
-		os=aout
+		os=
+		obj=aout
 		;;
 	c4x-* | tic4x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	c8051-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	clipper-intergraph)
 		os=clix
 		;;
 	hexagon-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	tic54x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	tic55x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	tic6x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
@@ -1558,28 +1818,43 @@ case $cpu-$vendor in
 		os=sunos3
 		;;
 	m68*-cisco)
-		os=aout
+		os=
+		obj=aout
 		;;
 	mep-*)
-		os=elf
+		os=
+		obj=elf
+		;;
+	# The -sgi and -siemens entries must be before the mips- entry
+	# or we get the wrong os.
+	*-sgi)
+		os=irix
+		;;
+	*-siemens)
+		os=sysv4
 		;;
 	mips*-cisco)
-		os=elf
+		os=
+		obj=elf
 		;;
-	mips*-*)
-		os=elf
+	mips*-*|nanomips*-*)
+		os=
+		obj=elf
 		;;
 	or32-*)
-		os=coff
+		os=
+		obj=coff
 		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
+	# This must be before the sparc-* entry or we get the wrong os.
+	*-tti)
 		os=sysv3
 		;;
 	sparc-* | *-sun)
 		os=sunos4.1.1
 		;;
 	pru-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	*-be)
 		os=beos
@@ -1603,7 +1878,7 @@ case $cpu-$vendor in
 		os=hpux
 		;;
 	*-hitachi)
-		os=hiux
+		os=hiuxwe2
 		;;
 	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
 		os=sysv
@@ -1647,12 +1922,6 @@ case $cpu-$vendor in
 	*-encore)
 		os=bsd
 		;;
-	*-sgi)
-		os=irix
-		;;
-	*-siemens)
-		os=sysv4
-		;;
 	*-masscomp)
 		os=rtu
 		;;
@@ -1660,10 +1929,12 @@ case $cpu-$vendor in
 		os=uxpv
 		;;
 	*-rom68k)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-*bug)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-apple)
 		os=macos
@@ -1681,87 +1952,325 @@ esac
 
 fi
 
-# Now, validate our (potentially fixed-up) OS.
+# Now, validate our (potentially fixed-up) individual pieces (OS, OBJ).
+
 case $os in
-	# Sometimes we do "kernel-abi", so those need to count as OSes.
-	musl* | newlib* | uclibc*)
+	# Sometimes we do "kernel-libc", so those need to count as OSes.
+	llvm* | musl* | newlib* | relibc* | uclibc*)
 		;;
-	# Likewise for "kernel-libc"
+	# Likewise for "kernel-abi"
 	eabi* | gnueabi*)
 		;;
+	# VxWorks passes extra cpu info in the 4th filed.
+	simlinux | simwindows | spe)
+		;;
+	# See `case $cpu-$os` validation below
+	ghcjs)
+		;;
 	# Now accept the basic system types.
-	# The portable systems comes first.
 	# Each alternative MUST end in a * to match a version number.
-	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
-	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
-	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
-	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
-	     | hiux* | abug | nacl* | netware* | windows* \
-	     | os9* | macos* | osx* | ios* \
-	     | mpw* | magic* | mmixware* | mon960* | lnews* \
-	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
-	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
-	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
-	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
-	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
-	     | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
-	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
-	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
-	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
-	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
-	     | chorusrdb* | cegcc* | glidix* \
-	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
-	     | midipix* | mingw32* | mingw64* | mint* \
-	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
-	     | interix* | uwin* | mks* | rhapsody* | darwin* \
-	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
-	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
-	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
-	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
-	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
-	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
-	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
-	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
-	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx*)
+	  abug \
+	| aix* \
+	| amdhsa* \
+	| amigados* \
+	| amigaos* \
+	| android* \
+	| aof* \
+	| aos* \
+	| aros* \
+	| atheos* \
+	| auroraux* \
+	| aux* \
+	| banan_os* \
+	| beos* \
+	| bitrig* \
+	| bme* \
+	| bosx* \
+	| bsd* \
+	| cegcc* \
+	| chorusos* \
+	| chorusrdb* \
+	| clix* \
+	| cloudabi* \
+	| cnk* \
+	| conix* \
+	| cos* \
+	| cxux* \
+	| cygwin* \
+	| darwin* \
+	| dgux* \
+	| dicos* \
+	| dnix* \
+	| domain* \
+	| dragonfly* \
+	| drops* \
+	| ebmon* \
+	| ecoff* \
+	| ekkobsd* \
+	| emscripten* \
+	| emx* \
+	| es* \
+	| fiwix* \
+	| freebsd* \
+	| fuchsia* \
+	| genix* \
+	| genode* \
+	| glidix* \
+	| gnu* \
+	| go32* \
+	| haiku* \
+	| hcos* \
+	| hiux* \
+	| hms* \
+	| hpux* \
+	| ieee* \
+	| interix* \
+	| ios* \
+	| iris* \
+	| irix* \
+	| isc* \
+	| its* \
+	| l4re* \
+	| libertybsd* \
+	| lites* \
+	| lnews* \
+	| luna* \
+	| lynxos* \
+	| mach* \
+	| macos* \
+	| magic* \
+	| mbr* \
+	| midipix* \
+	| midnightbsd* \
+	| mingw32* \
+	| mingw64* \
+	| minix* \
+	| mint* \
+	| mirbsd* \
+	| mks* \
+	| mlibc* \
+	| mmixware* \
+	| mon960* \
+	| morphos* \
+	| moss* \
+	| moxiebox* \
+	| mpeix* \
+	| mpw* \
+	| msdos* \
+	| msys* \
+	| mvs* \
+	| nacl* \
+	| netbsd* \
+	| netware* \
+	| newsos* \
+	| nextstep* \
+	| nindy* \
+	| nonstopux* \
+	| nova* \
+	| nsk* \
+	| nucleus* \
+	| nx6 \
+	| nx7 \
+	| oabi* \
+	| ohos* \
+	| onefs* \
+	| openbsd* \
+	| openedition* \
+	| openstep* \
+	| os108* \
+	| os2* \
+	| os400* \
+	| os68k* \
+	| os9* \
+	| ose* \
+	| osf* \
+	| oskit* \
+	| osx* \
+	| palmos* \
+	| phoenix* \
+	| plan9* \
+	| powermax* \
+	| powerunix* \
+	| proelf* \
+	| psos* \
+	| psp* \
+	| ptx* \
+	| pw32* \
+	| qnx* \
+	| rdos* \
+	| redox* \
+	| rhapsody* \
+	| riscix* \
+	| riscos* \
+	| rtems* \
+	| rtmk* \
+	| rtu* \
+	| scout* \
+	| secbsd* \
+	| sei* \
+	| serenity* \
+	| sim* \
+	| skyos* \
+	| solaris* \
+	| solidbsd* \
+	| sortix* \
+	| storm-chaos* \
+	| sunos \
+	| sunos[34]* \
+	| superux* \
+	| syllable* \
+	| sym* \
+	| sysv* \
+	| tenex* \
+	| tirtos* \
+	| tock* \
+	| toppers* \
+	| tops10* \
+	| tops20* \
+	| tpf* \
+	| tvos* \
+	| twizzler* \
+	| uclinux* \
+	| udi* \
+	| udk* \
+	| ultrix* \
+	| unicos* \
+	| uniplus* \
+	| unleashed* \
+	| unos* \
+	| uwin* \
+	| uxpv* \
+	| v88r* \
+	|*vms* \
+	| vos* \
+	| vsta* \
+	| vxsim* \
+	| vxworks* \
+	| wasi* \
+	| watchos* \
+	| wince* \
+	| windiss* \
+	| windows* \
+	| winnt* \
+	| xenix* \
+	| xray* \
+	| zephyr* \
+	| zvmoe* )
 		;;
 	# This one is extra strict with allowed versions
 	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
 		# Don't forget version if it is 3.2v4 or newer.
 		;;
+	# This refers to builds using the UEFI calling convention
+	# (which depends on the architecture) and PE file format.
+	# Note that this is both a different calling convention and
+	# different file format than that of GNU-EFI
+	# (x86_64-w64-mingw32).
+	uefi)
+		;;
 	none)
 		;;
+	kernel* | msvc* )
+		# Restricted further below
+		;;
+	'')
+		if test x"$obj" = x
+		then
+			echo "Invalid configuration '$1': Blank OS only allowed with explicit machine code file format" 1>&2
+		fi
+		;;
 	*)
-		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
+		echo "Invalid configuration '$1': OS '$os' not recognized" 1>&2
+		exit 1
+		;;
+esac
+
+case $obj in
+	aout* | coff* | elf* | pe*)
+		;;
+	'')
+		# empty is fine
+		;;
+	*)
+		echo "Invalid configuration '$1': Machine code format '$obj' not recognized" 1>&2
+		exit 1
+		;;
+esac
+
+# Here we handle the constraint that a (synthetic) cpu and os are
+# valid only in combination with each other and nowhere else.
+case $cpu-$os in
+	# The "javascript-unknown-ghcjs" triple is used by GHC; we
+	# accept it here in order to tolerate that, but reject any
+	# variations.
+	javascript-ghcjs)
+		;;
+	javascript-* | *-ghcjs)
+		echo "Invalid configuration '$1': cpu '$cpu' is not valid with os '$os$obj'" 1>&2
 		exit 1
 		;;
 esac
 
 # As a final step for OS-related things, validate the OS-kernel combination
 # (given a valid OS), if there is a kernel.
-case $kernel-$os in
-	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
+case $kernel-$os-$obj in
+	linux-gnu*- | linux-android*- | linux-dietlibc*- | linux-llvm*- \
+		    | linux-mlibc*- | linux-musl*- | linux-newlib*- \
+		    | linux-relibc*- | linux-uclibc*- | linux-ohos*- )
 		;;
-	uclinux-uclibc* )
+	uclinux-uclibc*- | uclinux-gnu*- )
 		;;
-	-dietlibc* | -newlib* | -musl* | -uclibc* )
+	ironclad-mlibc*-)
+		;;
+	managarm-mlibc*- | managarm-kernel*- )
+		;;
+	windows*-msvc*-)
+		;;
+	-dietlibc*- | -llvm*- | -mlibc*- | -musl*- | -newlib*- | -relibc*- \
+		    | -uclibc*- )
 		# These are just libc implementations, not actual OSes, and thus
 		# require a kernel.
-		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
+		echo "Invalid configuration '$1': libc '$os' needs explicit kernel." 1>&2
 		exit 1
 		;;
-	kfreebsd*-gnu* | kopensolaris*-gnu*)
+	-kernel*- )
+		echo "Invalid configuration '$1': '$os' needs explicit kernel." 1>&2
+		exit 1
 		;;
-	nto-qnx*)
+	*-kernel*- )
+		echo "Invalid configuration '$1': '$kernel' does not support '$os'." 1>&2
+		exit 1
 		;;
-	os2-emx)
+	*-msvc*- )
+		echo "Invalid configuration '$1': '$os' needs 'windows'." 1>&2
+		exit 1
 		;;
-	*-eabi* | *-gnueabi*)
+	kfreebsd*-gnu*- | knetbsd*-gnu*- | netbsd*-gnu*- | kopensolaris*-gnu*-)
 		;;
-	-*)
+	vxworks-simlinux- | vxworks-simwindows- | vxworks-spe-)
+		;;
+	nto-qnx*-)
+		;;
+	os2-emx-)
+		;;
+	rtmk-nova-)
+		;;
+	*-eabi*- | *-gnueabi*-)
+		;;
+	ios*-simulator- | tvos*-simulator- | watchos*-simulator- )
+		;;
+	none--*)
+		# None (no kernel, i.e. freestanding / bare metal),
+		# can be paired with an machine code file format
+		;;
+	-*-)
 		# Blank kernel with real OS is always fine.
 		;;
-	*-*)
-		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
+	--*)
+		# Blank kernel and OS with real machine code file format is always fine.
+		;;
+	*-*-*)
+		echo "Invalid configuration '$1': Kernel '$kernel' not known to work with OS '$os'." 1>&2
 		exit 1
 		;;
 esac
@@ -1774,7 +2283,7 @@ case $vendor in
 			*-riscix*)
 				vendor=acorn
 				;;
-			*-sunos*)
+			*-sunos* | *-solaris*)
 				vendor=sun
 				;;
 			*-cnk* | *-aix*)
@@ -1844,12 +2353,12 @@ case $vendor in
 		;;
 esac
 
-echo "$cpu-$vendor-${kernel:+$kernel-}$os"
+echo "$cpu-$vendor${kernel:+-$kernel}${os:+-$os}${obj:+-$obj}"
 exit
 
 # Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp nil t)
 # time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-format: "%Y-%02m-%02d"
 # time-stamp-end: "'"
 # End:
diff --git a/build-aux/install-sh b/build-aux/install-sh
index ebc66913..b44de098 100755
--- a/build-aux/install-sh
+++ b/build-aux/install-sh
@@ -115,7 +115,7 @@ fi
 if [ x"$dir_arg" != x ]; then
 	dst=$src
 	src=""
-	
+
 	if [ -d $dst ]; then
 		instcmd=:
 	else
@@ -124,7 +124,7 @@ if [ x"$dir_arg" != x ]; then
 else
 
 # Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad 
+# might cause directories to be created, which would be especially bad
 # if $src (and thus $dsttmp) contains '*'.
 
 	if [ -f $src -o -d $src ]
@@ -134,7 +134,7 @@ else
 		echo "install:  $src does not exist"
 		exit 1
 	fi
-	
+
 	if [ x"$dst" = x ]
 	then
 		echo "install:	no destination specified"
@@ -201,17 +201,17 @@ else
 
 # If we're going to rename the final executable, determine the name now.
 
-	if [ x"$transformarg" = x ] 
+	if [ x"$transformarg" = x ]
 	then
 		dstfile=`basename $dst`
 	else
-		dstfile=`basename $dst $transformbasename | 
+		dstfile=`basename $dst $transformbasename |
 			sed $transformarg`$transformbasename
 	fi
 
 # don't allow the sed command to completely eliminate the filename
 
-	if [ x"$dstfile" = x ] 
+	if [ x"$dstfile" = x ]
 	then
 		dstfile=`basename $dst`
 	else
@@ -242,7 +242,7 @@ else
 # Now rename the file to the real destination.
 
 	$doit $rmcmd -f $dstdir/$dstfile &&
-	$doit $mvcmd $dsttmp $dstdir/$dstfile 
+	$doit $mvcmd $dsttmp $dstdir/$dstfile
 
 fi &&
 
diff --git a/configure.ac b/configure.ac
index f6d25f33..e57d0667 100644
--- a/configure.ac
+++ b/configure.ac
@@ -92,6 +92,32 @@ AC_LANG_POP([C++])
 JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 
+CONFIGURE_LDFLAGS=
+SPECIFIED_LDFLAGS="${LDFLAGS}"
+dnl JE_LDFLAGS_ADD(ldflag)
+dnl
+dnl LDFLAGS is the concatenation of CONFIGURE_LDFLAGS and SPECIFIED_LDFLAGS
+dnl This macro appends to CONFIGURE_LDFLAGS and regenerates LDFLAGS.
+AC_DEFUN([JE_LDFLAGS_ADD],
+[
+AC_MSG_CHECKING([whether linker supports $1])
+T_CONFIGURE_LDFLAGS="${CONFIGURE_LDFLAGS}"
+JE_APPEND_VS(CONFIGURE_LDFLAGS, $1)
+JE_CONCAT_VVV(LDFLAGS, CONFIGURE_LDFLAGS, SPECIFIED_LDFLAGS)
+AC_LINK_IFELSE([AC_LANG_PROGRAM(
+[[
+]], [[
+    return 0;
+]])],
+              [je_cv_ldflags_added=$1]
+              AC_MSG_RESULT([yes]),
+              [je_cv_ldflags_added=]
+              AC_MSG_RESULT([no])
+              [CONFIGURE_LDFLAGS="${T_CONFIGURE_LDFLAGS}"]
+)
+JE_CONCAT_VVV(LDFLAGS, CONFIGURE_LDFLAGS, SPECIFIED_LDFLAGS)
+])
+
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
 dnl
 dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
@@ -298,6 +324,15 @@ fi
 ,
 enable_cxx="1"
 )
+AC_ARG_WITH([cxx_stdlib],
+  [AS_HELP_STRING([--with-cxx-stdlib=<libstdc++|libcxx>],
+  [Specify the C++ standard library to link (default: probe for libstdc++)])],
+  [case "${with_cxx_stdlib}" in
+    libstdc++|libcxx) ;;
+    *) AC_MSG_ERROR([bad value ${with_cxx_stdlib} for --with-cxx-stdlib]) ;;
+  esac],
+  [with_cxx_stdlib=""]
+)
 if test "x$enable_cxx" = "x1" ; then
   dnl Require at least c++14, which is the first version to support sized
   dnl deallocation.  C++ support is not compiled otherwise.
@@ -312,17 +347,28 @@ if test "x$enable_cxx" = "x1" ; then
     JE_CXXFLAGS_ADD([-g3])
 
     SAVED_LIBS="${LIBS}"
-    JE_APPEND_VS(LIBS, -lstdc++)
-    JE_COMPILABLE([libstdc++ linkage], [
+    case "${with_cxx_stdlib}" in
+      libstdc++)
+        JE_APPEND_VS(LIBS, -lstdc++)
+        ;;
+      libcxx)
+        JE_APPEND_VS(LIBS, -lc++)
+        ;;
+      *)
+        dnl Probe for libstdc++ (the default when --with-cxx-stdlib is not given).
+        JE_APPEND_VS(LIBS, -lstdc++)
+        JE_COMPILABLE([libstdc++ linkage], [
 #include <stdlib.h>
 ], [[
 	int *arr = (int *)malloc(sizeof(int) * 42);
 	if (arr == NULL)
 		return 1;
 ]], [je_cv_libstdcxx])
-    if test "x${je_cv_libstdcxx}" = "xno" ; then
-      LIBS="${SAVED_LIBS}"
-    fi
+        if test "x${je_cv_libstdcxx}" = "xno" ; then
+          LIBS="${SAVED_LIBS}"
+        fi
+        ;;
+    esac
   else
     enable_cxx="0"
   fi
@@ -510,6 +556,23 @@ typedef unsigned __int32 uint32_t;
       else
         AC_MSG_ERROR([cannot determine number of significant virtual address bits])
       fi
+      AC_CACHE_CHECK([rdtscp support],
+		     [je_cv_rdtscp],
+		     AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+#include <stdint.h>
+]],
+[[
+      unsigned int dx;
+      asm volatile("rdtscp" : "=d"(dx) ::);
+      return 0;
+]])],
+      [je_cv_rdtscp=yes],
+      [je_cv_rdtscp=no],
+      [je_cv_rdtscp=no]))
+      if test "x${je_cv_rdtscp}" = "xyes"; then
+        AC_DEFINE([JEMALLOC_HAVE_RDTSCP], [ ], [ ])
+      fi
     fi
     ;;
   *)
@@ -529,6 +592,37 @@ typedef unsigned __int32 uint32_t;
     ;;
 esac
 AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
+AC_CACHE_CHECK([asm volatile support],
+               [je_cv_asm_volatile],
+               AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+]],
+[[
+      void* ptr;
+      asm volatile("" : "+r"(ptr));
+      return 0;
+]])],
+[je_cv_asm_volatile=yes],
+[je_cv_asm_volatile=no],
+[je_cv_asm_volatile=no]))
+if test "x${je_cv_asm_volatile}" = "xyes"; then
+  AC_DEFINE([JEMALLOC_HAVE_ASM_VOLATILE], [ ], [ ])
+fi
+AC_CACHE_CHECK([__int128 support],
+               [je_cv_int128],
+               AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+]],
+[[
+      __int128 temp = 0;
+      return temp;
+]])],
+[je_cv_int128=yes],
+[je_cv_int128=no],
+[je_cv_int128=no]))
+if test "x${je_cv_int128}" = "xyes"; then
+  AC_DEFINE([JEMALLOC_HAVE_INT128], [ ], [ ])
+fi
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
@@ -578,7 +672,7 @@ AC_ARG_WITH([version],
   [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
    [Version string])],
   [
-    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    echo "${with_version}" | grep ['^[0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}-[0-9]\{1,\}-g[0-9a-f]\{1,\}$'] 2>&1 1>/dev/null
     if test $? -eq 0 ; then
       echo "$with_version" > "${objroot}VERSION"
     else
@@ -654,6 +748,9 @@ case "${host}" in
 	SOREV="${rev}.${so}"
 	sbrk_deprecated="1"
 	SYM_PREFIX="_"
+	if test "${LG_SIZEOF_PTR}" = "3"; then
+	  default_retain="1"
+	fi
 	;;
   *-*-freebsd*)
 	JE_APPEND_VS(CPPFLAGS, -D_BSD_SOURCE)
@@ -687,6 +784,19 @@ case "${host}" in
 	fi
 	zero_realloc_default_free="1"
 	;;
+  *-*-linux-musl*)
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
+	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	if test "${LG_SIZEOF_PTR}" = "3"; then
+	  default_retain="1"
+	fi
+	zero_realloc_default_free="1"
+	;;
   *-*-linux*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
@@ -829,11 +939,26 @@ AC_SUBST([DUMP_SYMS])
 AC_SUBST([CC_MM])
 
 dnl Determine whether libm must be linked to use e.g. log(3).
-AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
-if test "x$ac_cv_search_log" != "xnone required" ; then
-  LM="$ac_cv_search_log"
-else
+
+# On MSVC, log is an intrinsic that doesn't require libm. However,
+# AC_SEARCH_LIBS does not successfully detect this, as it will try to compile
+# a program using the wrong signature for log. Newer versions of MSVC CL detects
+# this and rejects the program with the following messages.
+#
+# conftest.c(40): warning C4391: 'char log()': incorrect return type for intrinsic function, expected 'double'
+# conftest.c(44): error C2168: 'log': too few actual parameters for intrinsic function
+#
+# Since log is always available on MSVC (it's been around since the dawn of
+# time), we simply always assume it's there if MSVC is detected.
+if test "x$je_cv_msvc" = "xyes" ; then
   LM=
+else
+  AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+    if test "x$ac_cv_search_log" != "xnone required" ; then
+      LM="$ac_cv_search_log"
+    else
+      LM=
+    fi
 fi
 AC_SUBST(LM)
 
@@ -939,6 +1064,30 @@ if test "x${je_cv_cold}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ], [ ])
 fi
 
+dnl Check for deprecated attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Wdeprecated-declarations])
+JE_COMPILABLE([deprecated attribute],
+              [#if !__has_attribute(deprecated)
+               #error "deprecated attribute not supported"
+               #endif
+               struct has_deprecated_field {
+                   int good;
+                   int __attribute__((deprecated("Do not use"))) bad;
+               };
+              ],
+              [struct has_deprecated_field instance;
+               instance.good = 0;
+               instance.bad = 1;
+              ],
+              [je_cv_deprecated])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_deprecated}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_DEPRECATED], [ ], [ ])
+  JE_CFLAGS_ADD([-Wdeprecated-declarations])
+  JE_CXXFLAGS_ADD([-Wdeprecated-declarations])
+fi
+
 dnl Check for VM_MAKE_TAG for mmap support.
 JE_COMPILABLE([vm_make_tag],
 	      [#include <sys/mman.h>
@@ -1052,11 +1201,11 @@ AC_SUBST([JEMALLOC_CPREFIX])
 AC_ARG_WITH([export],
   [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])],
   [if test "x$with_export" = "xno"; then
-  AC_DEFINE([JEMALLOC_EXPORT],[], [ ])
+  AC_DEFINE([JEMALLOC_EXPORT], [ ], [ ])
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free free_sized free_aligned_sized mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ], [ ])
@@ -1064,6 +1213,9 @@ AC_CHECK_FUNC([memalign],
 AC_CHECK_FUNC([valloc],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ], [ ])
 	       public_syms="${public_syms} valloc"])
+AC_CHECK_FUNC([pvalloc],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_PVALLOC], [ ], [ ])
+	       public_syms="${public_syms} pvalloc"])
 AC_CHECK_FUNC([malloc_size],
 	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ], [ ])
 	       public_syms="${public_syms} malloc_size"])
@@ -1077,6 +1229,16 @@ if test "x${JEMALLOC_PREFIX}" = "x" ; then
   AC_CHECK_FUNC([__libc_free],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_free"])
+  dnl __libc_free_sized and __libc_free_aligned_sized are here speculatively
+  dnl under the assumption that glibc will eventually define symbols with these
+  dnl names. In the event glibc chooses different names for these symbols,
+  dnl these will need to be amended to match.
+  AC_CHECK_FUNC([__libc_free_sized],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE_SIZED], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_free_sized"])
+  AC_CHECK_FUNC([__libc_free_aligned_sized],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_free_aligned_sized"])
   AC_CHECK_FUNC([__libc_malloc],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_malloc"])
@@ -1089,6 +1251,9 @@ if test "x${JEMALLOC_PREFIX}" = "x" ; then
   AC_CHECK_FUNC([__libc_valloc],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_valloc"])
+  AC_CHECK_FUNC([__libc_pvalloc],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_PVALLOC], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_pvalloc"])
   AC_CHECK_FUNC([__posix_memalign],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ], [ ])
 		 wrap_syms="${wrap_syms} __posix_memalign"])
@@ -1256,6 +1421,23 @@ if test "x$enable_stats" = "x1" ; then
 fi
 AC_SUBST([enable_stats])
 
+dnl Disable reading configuration from file and environment variable
+AC_ARG_ENABLE([user_config],
+  [AS_HELP_STRING([--disable-user-config],
+  [Do not read malloc config from /etc/malloc.conf or MALLOC_CONF])],
+[if test "x$enable_user_config" = "xno" ; then
+  enable_user_config="0"
+else
+  enable_user_config="1"
+fi
+],
+[enable_user_config="1"]
+)
+if test "x$enable_user_config" = "x1" ; then
+  AC_DEFINE([JEMALLOC_CONFIG_ENV], [ ], [ ])
+  AC_DEFINE([JEMALLOC_CONFIG_FILE], [ ], [ ])
+fi
+
 dnl Do not enable smallocx by default.
 AC_ARG_ENABLE([experimental_smallocx],
   [AS_HELP_STRING([--enable-experimental-smallocx], [Enable experimental smallocx API])],
@@ -1329,6 +1511,33 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
   fi
 fi
 
+if test `uname -s` = "Linux"
+then
+  AC_ARG_ENABLE([prof-frameptr],
+    [AS_HELP_STRING([--enable-prof-frameptr], [Use optimized frame pointer unwinder for backtracing (Linux only)])],
+  [if test "x$enable_prof_frameptr" = "xno" ; then
+    enable_prof_frameptr="0"
+  else
+    enable_prof_frameptr="1"
+    if test "x$enable_prof" = "x0" ; then
+      AC_MSG_ERROR([--enable-prof-frameptr should only be used with --enable-prof])
+    fi
+  fi
+  ],
+  [enable_prof_frameptr="0"]
+  )
+  if test "x$backtrace_method" = "x" -a "x$enable_prof_frameptr" = "x1" \
+      -a "x$GCC" = "xyes" ; then
+    JE_CFLAGS_ADD([-fno-omit-frame-pointer])
+    backtrace_method="frame pointer linux"
+    AC_DEFINE([JEMALLOC_PROF_FRAME_POINTER], [ ], [ ])
+  else
+    enable_prof_frameptr="0"
+  fi
+else
+  enable_prof_frameptr="0"
+fi
+
 AC_ARG_ENABLE([prof-libgcc],
   [AS_HELP_STRING([--disable-prof-libgcc],
   [Do not use libgcc for backtracing])],
@@ -1404,6 +1613,18 @@ if test "x$zero_realloc_default_free" = "x1" ; then
   AC_DEFINE([JEMALLOC_ZERO_REALLOC_DEFAULT_FREE], [ ], [ ])
 fi
 
+dnl Support allocation from DSS by default
+AC_ARG_ENABLE([dss],
+  [AS_HELP_STRING([--disable-dss], [Disable usage of sbrk(2)])],
+[if test "x$enable_dss" = "xno" ; then
+  enable_dss="0"
+else
+  enable_dss="1"
+fi
+],
+[enable_dss="1"]
+)
+
 dnl Enable allocation from DSS if supported by the OS.
 have_dss="1"
 dnl Check whether the BSD/SUSv1 sbrk() exists.  If not, disable DSS support.
@@ -1417,7 +1638,7 @@ else
   have_dss="0"
 fi
 
-if test "x$have_dss" = "x1" ; then
+if test "x$have_dss" = "x1" -a "x$enable_dss" = "x1" ; then
   AC_DEFINE([JEMALLOC_DSS], [ ], [ ])
 fi
 
@@ -1480,6 +1701,55 @@ else
 fi
 AC_SUBST([enable_utrace])
 
+dnl Disable experimental sdt tracing by default.
+AC_ARG_ENABLE([experimental-sdt],
+  [AS_HELP_STRING([--enable-experimental-sdt], [Enable systemtap USDT probes])],
+[if test "x$enable_experimental_sdt" = "xno" ; then
+  enable_experimental_sdt="0"
+else
+	JE_COMPILABLE([systemtap sdt], [
+#include <sys/sdt.h>
+	], [
+void foo(int i, void *p) { STAP_PROBE2(jemalloc, test, i, p); }
+  	],
+	[je_cv_stap_sdt])
+
+	if test "x${je_cv_stap_sdt}" = "xyes" ; then
+	   enable_experimental_sdt="1"
+	elif test "x${abi}" = "xelf" ; then
+	     case "${host}" in
+	     	  *-*-linux-android*)
+			case "${host_cpu}" in aarch64|x86_64)
+			     enable_experimental_sdt="2"
+			     ;;
+			esac
+			;;
+		  *-*-linux*)
+			case "${host_cpu}" in x86_64|aarch64|arm*)
+			      enable_experimental_sdt="2"
+			      ;;
+			esac
+		        ;;
+		  *)
+			enable_experimental_sdt="0"
+			AC_MSG_ERROR([Unsupported sdt on this platform])
+			;;
+	     esac
+	else
+	   AC_MSG_ERROR([Unsupported sdt on this platform])
+   	fi
+fi
+],
+[enable_experimental_sdt="0"]
+)
+
+if test "x$enable_experimental_sdt" = "x1" ; then
+    AC_DEFINE([JEMALLOC_EXPERIMENTAL_USDT_STAP], [ ], [ ])
+elif test "x$enable_experimental_sdt" = "x2"; then
+    AC_DEFINE([JEMALLOC_EXPERIMENTAL_USDT_CUSTOM], [ ], [ ])
+fi
+AC_SUBST([enable_experimental_sdt])
+
 dnl Do not support the xmalloc option by default.
 AC_ARG_ENABLE([xmalloc],
   [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
@@ -1545,6 +1815,22 @@ if test "x$enable_readlinkat" = "x1" ; then
 fi
 AC_SUBST([enable_readlinkat])
 
+dnl Do not force getenv by default
+AC_ARG_ENABLE([force-getenv],
+  [AS_HELP_STRING([--enable-force-getenv], [Use getenv over secure_getenv])],
+[if test "x$enable_force_getenv" = "xno" ; then
+  enable_force_getenv="0"
+else
+  enable_force_getenv="1"
+fi
+],
+[enable_force_getenv="0"]
+)
+if test "x$enable_force_getenv" = "x1" ; then
+  AC_DEFINE([JEMALLOC_FORCE_GETENV], [ ], [ ])
+fi
+AC_SUBST([force_getenv])
+
 dnl Avoid extra safety checks by default
 AC_ARG_ENABLE([opt-safety-checks],
   [AS_HELP_STRING([--enable-opt-safety-checks],
@@ -1592,7 +1878,7 @@ fi
 [enable_uaf_detection="0"]
 )
 if test "x$enable_uaf_detection" = "x1" ; then
-  AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ])
+  AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ], [ ])
 fi
 AC_SUBST([enable_uaf_detection])
 
@@ -1694,6 +1980,16 @@ case "${host}" in
         LG_PAGE=14
       fi
       ;;
+  *-*-linux-android)
+      if test "x$LG_PAGE" = "xdetect"; then
+	AC_CHECK_DECLS([PAGE_SIZE], [LG_PAGE=12], [LG_PAGE=14], [#include <sys/user.h>])
+      fi
+      ;;
+  aarch64-unknown-linux-*)
+      if test "x$LG_PAGE" = "xdetect"; then
+        LG_PAGE=16
+      fi
+      ;;
 esac
 if test "x$LG_PAGE" = "xdetect"; then
   AC_CACHE_CHECK([LG_PAGE],
@@ -1758,7 +2054,7 @@ if test "x${je_cv_lg_hugepage}" = "x" ; then
   dnl   Hugepagesize:       2048 kB
   if test -e "/proc/meminfo" ; then
     hpsk=[`cat /proc/meminfo 2>/dev/null | \
-          grep -e '^Hugepagesize:[[:space:]]\+[0-9]\+[[:space:]]kB$' | \
+          grep '^Hugepagesize:[[:space:]]\{1,\}[0-9]\{1,\}[[:space:]]kB$' | \
           awk '{print $2}'`]
     if test "x${hpsk}" != "x" ; then
       je_cv_lg_hugepage=10
@@ -1855,6 +2151,16 @@ dnl Check if we have dlsym support.
   if test "x${je_cv_pthread_getname_np}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ], [ ])
   fi
+  dnl Check if pthread_set_name_np is available with the expected API.
+  JE_COMPILABLE([pthread_set_name_np(3)], [
+#include <pthread.h>
+#include <pthread_np.h>
+], [
+  pthread_set_name_np(pthread_self(), "set_name_test");
+], [je_cv_pthread_set_name_np])
+  if test "x${je_cv_pthread_set_name_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SET_NAME_NP], [ ], [ ])
+  fi
   dnl Check if pthread_get_name_np is not necessarily present despite
   dnl the pthread_set_name_np counterpart
   JE_COMPILABLE([pthread_get_name_np(3)], [
@@ -1942,6 +2248,16 @@ if test "x${je_cv_clock_realtime}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME], [ ], [ ])
 fi
 
+dnl Check for clock_gettime_nsec_np().
+JE_COMPILABLE([clock_gettime_nsec_np()], [
+#include <time.h>
+], [
+	clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
+], [je_cv_clock_gettime_nsec_np])
+if test "x${je_cv_clock_gettime_nsec_np}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP], [ ], [ ])
+fi
+
 dnl Use syscall(2) (if available) by default.
 AC_ARG_ENABLE([syscall],
   [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])],
@@ -1998,6 +2314,15 @@ if test "x$have_sched_setaffinity" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ], [ ])
 fi
 
+dnl Check if the pthread_setaffinity_np function exists.
+AC_CHECK_FUNC([pthread_setaffinity_np],
+              [have_pthread_setaffinity_np="1"],
+              [have_pthread_setaffinity_np="0"]
+             )
+if test "x$have_pthread_setaffinity_np" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP], [ ], [ ])
+fi
+
 dnl Check if the Solaris/BSD issetugid function exists.
 AC_CHECK_FUNC([issetugid],
               [have_issetugid="1"],
@@ -2041,6 +2366,14 @@ if test "x$have_memcntl" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ], [ ])
 fi
 
+AC_CHECK_FUNC([prctl],
+	      [have_prctl="1"],
+	      [have_prctl="0"],
+	      )
+if test "x$have_prctl" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_PRCTL], [ ], [ ])
+fi
+
 dnl Disable lazy locking by default.
 AC_ARG_ENABLE([lazy_lock],
   [AS_HELP_STRING([--enable-lazy-lock],
@@ -2203,6 +2536,13 @@ if test "x${je_cv_osatomic}" = "xyes" ; then
 fi
 
 dnl ============================================================================
+
+AC_ARG_WITH([experimental_sys_process_madvise],
+  [AS_HELP_STRING([--with-experimental-sys-process-madvise=<experimental-sys-process-madvise>],
+   [Force process_madvise and use experimental-sys-process-madvise number when making syscall])],
+  [je_cv_sys_pmadv_nr="${with_experimental_sys_process_madvise}"],
+  [je_cv_sys_pmadv_nr=""])
+
 dnl Check for madvise(2).
 
 JE_COMPILABLE([madvise(2)], [
@@ -2260,6 +2600,16 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+  case "${host_cpu}" in
+    arm*)
+      ;;
+    *)
+    if test "x${je_cv_thp}" = "xyes" ; then
+      AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
+    fi
+    ;;
+  esac
+
   dnl Check for madvise(..., MADV_[NO]CORE).
   JE_COMPILABLE([madvise(..., MADV_[[NO]]CORE)], [
 #include <sys/mman.h>
@@ -2270,15 +2620,35 @@ if test "x${je_cv_madvise}" = "xyes" ; then
   if test "x${je_cv_madv_nocore}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
   fi
-case "${host_cpu}" in
-  arm*)
-    ;;
-  *)
-  if test "x${je_cv_thp}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
+
+  dnl Check for madvise(..., MADV_COLLAPSE).
+  JE_COMPILABLE([madvise(..., MADV_COLLAPSE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_COLLAPSE);
+], [je_cv_madv_collapse])
+  if test "x${je_cv_madv_collapse}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE_COLLAPSE], [ ], [ ])
+  fi
+
+  dnl Check for process_madvise
+  JE_COMPILABLE([process_madvise(2)], [
+#include <sys/pidfd.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+], [
+	syscall(SYS_process_madvise, PIDFD_SELF, (void *)0, 0, 0, 0);
+], [je_cv_process_madvise])
+  if test "x${je_cv_process_madvise}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
+  else
+    if test "x${je_cv_sys_pmadv_nr}" != "x" ; then
+      dnl Forcing experimental usage of process_madvise
+      AC_MSG_RESULT([Forcing usage of process_madvise with syscall nr=${je_cv_sys_pmadv_nr}])
+      AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
+      AC_DEFINE_UNQUOTED([EXPERIMENTAL_SYS_PROCESS_MADVISE_NR], [${je_cv_sys_pmadv_nr}], [ ])
+    fi
   fi
-  ;;
-esac
 else
   dnl Check for posix_madvise.
   JE_COMPILABLE([posix_madvise], [
@@ -2403,12 +2773,62 @@ AC_SUBST([enable_initial_exec_tls])
 if test "x${je_cv_tls_model}" = "xyes" -a \
        "x${enable_initial_exec_tls}" = "x1" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
-            [__attribute__((tls_model("initial-exec")))], 
+            [__attribute__((tls_model("initial-exec")))],
             [ ])
 else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ], [ ])
 fi
 
+dnl Do not compile with debugging by default.
+AC_ARG_ENABLE([pageid],
+  [AS_HELP_STRING([--enable-pageid],
+                  [Enable named pages])],
+[if test "x$enable_pageid" = "xno" ; then
+  enable_pageid="0"
+else
+  enable_pageid="1"
+fi
+],
+[enable_pageid="0"]
+)
+if test "x$enable_pageid" = "x1" ; then
+  AC_DEFINE([JEMALLOC_PAGEID], [ ], [ ])
+fi
+
+AC_ARG_ENABLE([tsan],
+  [AS_HELP_STRING([--enable-tsan],
+                  [Enable thread sanitizer])],
+[if test "x$enable_tsan" = "xno" ; then
+  enable_tsan="0"
+else
+  enable_tsan="1"
+fi
+],
+[enable_tsan="0"]
+)
+if test "x$enable_tsan" = "x1" ; then
+  JE_CFLAGS_ADD([-fsanitize=thread])
+  JE_CXXFLAGS_ADD([-fsanitize=thread])
+  JE_LDFLAGS_ADD([-fsanitize=thread])
+fi
+
+AC_ARG_ENABLE([ubsan],
+  [AS_HELP_STRING([--enable-ubsan],
+                  [Enable undefined behavior sanitizer])],
+[if test "x$enable_ubsan" = "xno" ; then
+  enable_ubsan="0"
+else
+  enable_ubsan="1"
+fi
+],
+[enable_ubsan="0"]
+)
+if test "x$enable_ubsan" = "x1" ; then
+  JE_CFLAGS_ADD([-fsanitize=undefined])
+  JE_CXXFLAGS_ADD([-fsanitize=undefined])
+  JE_LDFLAGS_ADD([-fsanitize=undefined])
+fi
+
 dnl ============================================================================
 dnl Enable background threads if possible.
 
@@ -2468,6 +2888,15 @@ if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ], [ ])
 fi
 
+JE_COMPILABLE([gettid], [
+#include <unistd.h>
+], [
+  int tid = gettid();
+], [je_cv_gettid])
+if test "x${je_cv_gettid}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_GETTID], [ ], [ ])
+fi
+
 JE_CFLAGS_SAVE()
 JE_CFLAGS_ADD([-D_GNU_SOURCE])
 JE_CFLAGS_ADD([-Werror])
@@ -2482,9 +2911,19 @@ JE_COMPILABLE([strerror_r returns char with gnu source], [
   char *error = strerror_r(EINVAL, buffer, 100);
   printf("%s\n", error);
 ], [je_cv_strerror_r_returns_char_with_gnu_source])
+if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xno" ; then
+  JE_COMPILABLE([strerror_r header only], [
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+], [], [je_cv_strerror_r_header_pass])
+fi
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ], [ ])
+elif test "x${je_cv_strerror_r_header_pass}" = "xno" ; then
+  AC_MSG_ERROR([cannot determine return type of strerror_r])
 fi
 
 dnl ============================================================================
@@ -2622,7 +3061,8 @@ AC_MSG_RESULT([CXX                : ${CXX}])
 AC_MSG_RESULT([CONFIGURE_CXXFLAGS : ${CONFIGURE_CXXFLAGS}])
 AC_MSG_RESULT([SPECIFIED_CXXFLAGS : ${SPECIFIED_CXXFLAGS}])
 AC_MSG_RESULT([EXTRA_CXXFLAGS     : ${EXTRA_CXXFLAGS}])
-AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
+AC_MSG_RESULT([CONFIGURE_LDFLAGS  : ${CONFIGURE_LDFLAGS}])
+AC_MSG_RESULT([SPECIFIED_LDFLAGS  : ${SPECIFIED_LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([DSO_LDFLAGS        : ${DSO_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
@@ -2638,6 +3078,8 @@ AC_MSG_RESULT([INCLUDEDIR         : ${INCLUDEDIR}])
 AC_MSG_RESULT([LIBDIR             : ${LIBDIR}])
 AC_MSG_RESULT([MANDIR             : ${MANDIR}])
 AC_MSG_RESULT([])
+AC_MSG_RESULT([LG_PAGE            : ${LG_PAGE}])
+AC_MSG_RESULT([])
 AC_MSG_RESULT([srcroot            : ${srcroot}])
 AC_MSG_RESULT([abs_srcroot        : ${abs_srcroot}])
 AC_MSG_RESULT([objroot            : ${objroot}])
@@ -2654,9 +3096,11 @@ AC_MSG_RESULT([static libs        : ${enable_static}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
+AC_MSG_RESULT([user_config        : ${enable_user_config}])
 AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
+AC_MSG_RESULT([prof-frameptr      : ${enable_prof_frameptr}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
@@ -2665,5 +3109,9 @@ AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
 AC_MSG_RESULT([log                : ${enable_log}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
+AC_MSG_RESULT([pageid             : ${enable_pageid}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
+AC_MSG_RESULT([dss                : ${enable_dss}])
+AC_MSG_RESULT([tsan               : ${enable_tsan}])
+AC_MSG_RESULT([ubsan              : ${enable_ubsan}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e28e8f38..8bbe8120 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -33,6 +33,8 @@
     <refname>aligned_alloc</refname>
     <refname>realloc</refname>
     <refname>free</refname>
+    <refname>free_sized</refname>
+    <refname>free_aligned_sized</refname>
     <refname>mallocx</refname>
     <refname>rallocx</refname>
     <refname>xallocx</refname>
@@ -89,6 +91,17 @@
           <funcdef>void <function>free</function></funcdef>
           <paramdef>void *<parameter>ptr</parameter></paramdef>
         </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>free_sized</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>free_aligned_sized</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t <parameter>alignment</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
       </refsect2>
       <refsect2>
         <title>Non-standard API</title>
@@ -227,6 +240,17 @@
       allocated memory referenced by <parameter>ptr</parameter> to be made
       available for future allocations.  If <parameter>ptr</parameter> is
       <constant>NULL</constant>, no action occurs.</para>
+
+      <para>The <function>free_sized()</function> function is an extension of
+      <function>free()</function> with a <parameter>size</parameter> parameter
+      to allow the caller to pass in the allocation size as an optimization.
+      </para>
+
+      <para>The <function>free_aligned_sized()</function> function accepts a
+      <parameter>ptr</parameter> which was allocated with a requested
+      <parameter>size</parameter> and <parameter>alignment</parameter>, causing
+      the allocated memory referenced by <parameter>ptr</parameter> to be made
+      available for future allocations.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
@@ -451,6 +475,24 @@ for (i = 0; i < nbins; i++) {
       depended on, since such behavior is entirely implementation-dependent.
       </para>
     </refsect2>
+    <refsect2>
+      <title>Interactions Between the Standard and Non-standard APIs</title>
+      <para>Generally speaking it is permissible to pass pointers obtained from
+      the standard API to the non-standard API and vice versa (e.g. calling
+      <function>free()</function> with a pointer returned by a call to
+      <function>mallocx()</function>, calling <function>sdallocx()</function>
+      with a pointer returned by a call to <function>calloc()</function>).
+      There are however a few exceptions. In keeping with the C23 standard –
+      which forbids calling <function>free_sized()</function> on a pointer
+      returned by <function>aligned_alloc()</function>, mandating that either
+      <function>free_aligned_sized()</function> or <function>free()</function>
+      be used instead – using any combination of the standard and non-standard
+      APIs in an equivalent fashion (i.e. taking a pointer which was allocated
+      with an explicitly requested alignment and attempting to free it via an
+      API that accepts a size hint, without also providing the alignment hint)
+      is likewise forbidden.
+      </para>
+    </refsect2>
   </refsect1>
   <refsect1 id="tuning">
     <title>TUNING</title>
@@ -1095,7 +1137,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </term>
         <listitem><para>Maximum number of background threads that will be created
         if <link linkend="background_thread">background_thread</link> is set.
-        Defaults to number of cpus.</para></listitem>
+        Defaults to 4.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.dirty_decay_ms">
@@ -1121,9 +1163,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
         linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
-        for a description of muzzy pages.for a description of muzzy pages.  Note
-        that when the <link
-        linkend="opt.oversize_threshold"><mallctl>oversize_threshold</mallctl></link>
+        for a description of muzzy pages.  Note that when the <link linkend="opt.oversize_threshold"><mallctl>oversize_threshold</mallctl></link>
         feature is enabled, the arenas reserved for oversize requests may have
         its own default decay settings.</para></listitem>
       </varlistentry>
@@ -1145,7 +1185,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         purged according to a sigmoidal decay curve that starts and ends with
         zero purge rate.  A decay time of 0 causes all unused muzzy pages to be
         purged immediately upon creation.  A decay time of -1 disables purging.
-        The default decay time is 10 seconds.  See <link
+        Muzzy decay is disabled by default (with decay time 0).  See <link
         linkend="arenas.muzzy_decay_ms"><mallctl>arenas.muzzy_decay_ms</mallctl></link>
         and <link
         linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>
@@ -1369,6 +1409,17 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         extent hooks.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_bt_max">
+        <term>
+          <mallctl>opt.prof_bt_max</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Maximum number of stack frames to record in profiling
+        backtraces.  The default is 128.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.prof">
         <term>
           <mallctl>opt.prof</mallctl>
@@ -1474,6 +1525,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         by default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_pid_namespace">
+        <term>
+          <mallctl>opt.prof_pid_namespace</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Enable adding the pid namespace to the profile
+        filename. Profiles are dumped to files named according to the pattern
+        <filename>&lt;prefix&gt;.&lt;pid_namespace&gt;.&lt;pid&gt;.&lt;seq&gt;.i&lt;iseq&gt;.heap</filename>,
+        where <literal>&lt;prefix&gt;</literal> is controlled by the <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
+        options.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.lg_prof_interval">
         <term>
           <mallctl>opt.lg_prof_interval</mallctl>
@@ -1599,6 +1667,53 @@ malloc_conf = "xmalloc:true";]]></programlisting>
 	testing this behavior.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.debug_double_free_max_scan">
+        <term>
+          <mallctl>opt.debug_double_free_max_scan</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+          [<option>--enable-debug</option>]
+        </term>
+        <listitem><para>Maximum number of cached pointers to scan in the
+        thread cache when checking for double-free errors on deallocation.
+        When debug is enabled, each deallocation into the tcache scans up to
+        this many recently cached pointers to detect whether the same pointer
+        is being freed twice.  Setting this to 0 disables the check.  This
+        option is set to 0 and has no effect when debug is not enabled.  The
+        default is 32.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.disable_large_size_classes">
+        <term>
+          <mallctl>opt.disable_large_size_classes</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>When enabled (the default), large allocations
+        (i.e. allocations of size &gt;= <constant>SC_LARGE_MINCLASS</constant>)
+        are rounded up to the nearest page boundary rather than the nearest
+        large size class.  This minimizes memory overhead, especially when
+        using hugepages, at the cost of disabling the standard large size
+        class hierarchy.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.process_madvise_max_batch">
+        <term>
+          <mallctl>opt.process_madvise_max_batch</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum number of memory regions to include in each
+        <citerefentry><refentrytitle>process_madvise</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> batch call.  When set to 0
+        (the default), process_madvise is not used, and the standard
+        <citerefentry><refentrytitle>madvise</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> is used instead.  Setting this
+        to a positive value enables batched purging via process_madvise, which
+        can reduce the number of system calls needed for
+        purging.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.arena">
         <term>
           <mallctl>thread.arena</mallctl>
@@ -1735,6 +1850,47 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         the developer may find manual flushing useful.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="thread.tcache.max">
+        <term>
+          <mallctl>thread.tcache.max</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Get or set the maximum cached size class
+        (<varname>tcache_max</varname>) for the calling thread's tcache.  The
+        value is clamped to the maximum allowed limit and rounded up to the
+        nearest size class boundary.  Changing this value will resize the
+        thread cache accordingly.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.tcache.ncached_max.read_sizeclass">
+        <term>
+          <mallctl>thread.tcache.ncached_max.read_sizeclass</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Query the maximum number of cached objects
+        (<varname>ncached_max</varname>) for a given size class in the calling
+        thread's tcache.  The size class is passed in via
+        <parameter>newp</parameter>, and the corresponding
+        <varname>ncached_max</varname> is returned via
+        <parameter>oldp</parameter>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.tcache.ncached_max.write">
+        <term>
+          <mallctl>thread.tcache.ncached_max.write</mallctl>
+          (<type>char *</type>)
+          <literal>-w</literal>
+        </term>
+        <listitem><para>Set the maximum number of cached objects
+        (<varname>ncached_max</varname>) for size classes in the calling
+        thread's tcache.  The input is a string of pipe-separated settings,
+        where each setting specifies a size range and a count, in the same
+        format as the <mallctl>opt.tcache_ncached_max</mallctl> runtime
+        option.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.prof.name">
         <term>
           <mallctl>thread.prof.name</mallctl>
@@ -1918,6 +2074,24 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         linkend="thread.arena"><mallctl>thread.arena</mallctl></link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.name">
+        <term>
+          <mallctl>arena.&lt;i&gt;.name</mallctl>
+          (<type>char *</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Get or set a descriptive name for arena &lt;i&gt;.
+        Arena names can be up to 32 characters long (including the null
+        terminator); longer names are truncated.  When reading, the caller
+        passes a pointer to a pre-allocated buffer (of at least 32 bytes) via
+        <parameter>oldp</parameter>, and
+        <parameter>*oldlenp</parameter> must be
+        <code language="C">sizeof(<type>char *</type>)</code>.
+        Arena names are also included in the output of <link
+        linkend="stats_print"><function>malloc_stats_print()</function></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
@@ -2275,6 +2449,18 @@ struct extent_hooks_s {
         <listitem><para>Page size.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arenas.hugepage">
+        <term>
+          <mallctl>arenas.hugepage</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Hugepage size.  This value is also reported in the
+        output of <link
+        linkend="stats_print"><function>malloc_stats_print()</function></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="arenas.tcache_max">
         <term>
           <mallctl>arenas.tcache_max</mallctl>
@@ -2494,6 +2680,24 @@ struct extent_hooks_s {
         option for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="approximate_stats.active">
+        <term>
+          <mallctl>approximate_stats.active</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Return the total number of bytes in active pages
+        collected in an unsynchronized manner, without requiring an
+        <link linkend="epoch"><mallctl>epoch</mallctl></link> update.
+        As a result, this value should NOT be compared with other
+        stats.  For example, the relative ordering between
+        <mallctl>approximate_stats.active</mallctl> and <link
+        linkend="stats.active"><mallctl>stats.active</mallctl></link> or <link
+        linkend="stats.resident"><mallctl>stats.resident</mallctl></link> is
+        not guaranteed.  This interface is intended for lightweight monitoring
+        where an approximate value is sufficient.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.allocated">
         <term>
           <mallctl>stats.allocated</mallctl>
@@ -3267,7 +3471,7 @@ struct extent_hooks_s {
         <listitem><para>Current number of nonfull slabs.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.bins.mutex">
+      <varlistentry id="stats.arenas.i.bins.j.mutex">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.mutex.{counter}</mallctl>
           (<type>counter specific type</type>) <literal>r-</literal>
diff --git a/doc_internal/PROFILING_INTERNALS.md b/doc_internal/PROFILING_INTERNALS.md
index 0a9f31c0..f337fb88 100644
--- a/doc_internal/PROFILING_INTERNALS.md
+++ b/doc_internal/PROFILING_INTERNALS.md
@@ -99,7 +99,25 @@ Using this approach means that there are a few things users need to be aware of.
 If one stack appears twice as often as another, this by itself does not imply that it allocates twice as often. Consider the case in which there are only two types of allocating call stacks in a program. Stack A allocates 8 bytes, and occurs a million times in a program. Stack B allocates 8 MB, and occurs just once in a program. If our sampling rate $R$ is about 1MB, we expect stack A to show up about 8 times, and stack B to show up once. Stack A isn't 8 times more frequent than stack B, though; it's a million times more frequent.
 
 ### Aggregation must be done after unbiasing samples
-Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from a million machines. We then have 8 million occurs of stack A (each of 8 bytes), and a million occurrences of stack B (each of 8 MB). If we sum first, we'll attribute 64 MB to stack A, and 8 TB to stack B. Unbiasing changes these numbers by an infinitesimal amount, so that sum-then-unbias dramatically underreports the amount of memory allocated by stack A.
+Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from 1 million machines. We then have 8 million samples of stack A (8 per machine, each of 8 bytes), and 1 million samples of stack B (1 per machine, each of 8 MB).
+
+If we sum first then unbias based on this formula: $1 - e^{-Z/R}$ we get:
+
+$$Z = 8,000,000 * 8 bytes = 64MB$$
+$$64MB / (1 - e^{-64MB/1MB}) \approx 64MB (Stack A)$$
+
+$$Z = 1,000,000 * 8MB = 8TB$$
+$$8TB / (1 - e^{-1TB/1MB}) \approx 8TB (Stack B)$$
+
+Clearly we are unbiasing by an infinitesimal amount, which dramatically underreports the amount of memory allocated by stack A. Whereas if we unbias first and then sum:
+
+$$Z = 8 bytes$$
+$$8 bytes / (1 - e^{-8 bytes/1MB}) \approx 1MB$$
+$$1MB * 8,000,000 = 8TB (Stack A)$$
+
+$$Z = 8MB$$
+$$8MB / (1 - e^{-8MB/1MB})  \approx 8MB$$
+$$8MB * 1,000,000 = 8TB (Stack B)$$
 
 ## An avenue for future exploration
 While the framework we laid out above is pretty general, as an engineering decision we're only interested in fairly simple approaches (i.e. ones for which the chance of an allocation being sampled depends only on its size). Our job is then: for each size class $Z$, pick a probability $p_Z$ that an allocation of that size will be sampled. We made some handwave-y references to statistical distributions to justify our choices, but there's no reason we need to pick them that way. Any set of non-zero probabilities is a valid choice.
diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
deleted file mode 100644
index 6c2e84e3..00000000
--- a/include/jemalloc/internal/activity_callback.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
-#define JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
-
-/*
- * The callback to be executed "periodically", in response to some amount of
- * allocator activity.
- *
- * This callback need not be computing any sort of peak (although that's the
- * intended first use case), but we drive it from the peak counter, so it's
- * keeps things tidy to keep it here.
- *
- * The calls to this thunk get driven by the peak_event module.
- */
-#define ACTIVITY_CALLBACK_THUNK_INITIALIZER {NULL, NULL}
-typedef void (*activity_callback_t)(void *uctx, uint64_t allocated,
-    uint64_t deallocated);
-typedef struct activity_callback_thunk_s activity_callback_thunk_t;
-struct activity_callback_thunk_s {
-	activity_callback_t callback;
-	void *uctx;
-};
-
-#endif /* JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H */
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e6fceaaf..39794b3e 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/div.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/pages.h"
@@ -18,104 +21,105 @@ extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
 extern percpu_arena_mode_t opt_percpu_arena;
-extern const char *percpu_arena_mode_names[];
+extern const char *const   percpu_arena_mode_names[];
 
 extern div_info_t arena_binind_div_info[SC_NBINS];
 
-extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
 
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
 
+extern bool      opt_huge_arena_pac_thp;
+extern pac_thp_t huge_arena_pac_thp;
+
 /*
  * arena_bin_offsets[binind] is the offset of the first bin shard for size class
  * binind.
  */
 extern uint32_t arena_bin_offsets[SC_NBINS];
 
-void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
-    unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
-    ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
+void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
+    size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
+    hpa_shard_stats_t *hpastats);
 void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
-edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
-    size_t usize, size_t alignment, bool zero);
-void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata);
-void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldsize);
-void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldsize);
-bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
-    ssize_t decay_ms);
+edata_t *arena_extent_alloc_large(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
+void arena_extent_dalloc_large_prep(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata);
+void arena_extent_ralloc_large_shrink(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize);
+void arena_extent_ralloc_large_expand(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize);
+bool arena_decay_ms_set(
+    tsdn_t *tsdn, arena_t *arena, extent_state_t state, ssize_t decay_ms);
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
-void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all);
-uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
-void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
-void arena_reset(tsd_t *tsd, arena_t *arena);
-void arena_destroy(tsd_t *tsd, arena_t *arena);
-void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
-    const unsigned nfill);
+void    arena_decay(
+       tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
+uint64_t       arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
+void           arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
+void           arena_reset(tsd_t *tsd, arena_t *arena);
+void           arena_destroy(tsd_t *tsd, arena_t *arena);
+cache_bin_sz_t arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena,
+    szind_t binind, cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max, cache_bin_stats_t merge_stats);
 
-void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
-    szind_t ind, bool zero);
-void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache);
-void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
-void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path);
+void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
+    bool zero, bool slab);
+void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+    bool zero, bool slab, tcache_t *tcache);
+void  arena_prof_promote(
+     tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize);
+void arena_dalloc_promoted(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
 
-void arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin);
-void arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin);
-void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
-bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero, size_t *newsize);
+void  arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+void  arena_ptr_array_flush(tsd_t *tsd, szind_t binind,
+     cache_bin_ptr_array_t *arr, unsigned nflush, bool small,
+     arena_t *stats_arena, cache_bin_stats_t merge_stats);
+bool  arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+     size_t extra, bool zero, size_t *newsize);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache,
+    size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
-dss_prec_t arena_dss_prec_get(arena_t *arena);
-ehooks_t *arena_get_ehooks(arena_t *arena);
-extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
-    extent_hooks_t *extent_hooks);
-bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t      arena_dss_prec_get(arena_t *arena);
+ehooks_t       *arena_get_ehooks(arena_t *arena);
+extent_hooks_t *arena_set_extent_hooks(
+    tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks);
+bool    arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+void    arena_name_get(arena_t *arena, char *name);
+void    arena_name_set(arena_t *arena, const char *name);
 ssize_t arena_dirty_decay_ms_default_get(void);
-bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
+bool    arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
-bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
-bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
-    size_t *old_limit, size_t *new_limit);
+bool    arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+bool    arena_retain_grow_limit_get_set(
+       tsd_t *tsd, arena_t *arena, size_t *old_limit, size_t *new_limit);
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
-void arena_nthreads_inc(arena_t *arena, bool internal);
-void arena_nthreads_dec(arena_t *arena, bool internal);
+void     arena_nthreads_inc(arena_t *arena, bool internal);
+void     arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
-bool arena_init_huge(void);
-bool arena_is_huge(unsigned arena_ind);
+bool     arena_init_huge(tsdn_t *tsdn, arena_t *a0);
 arena_t *arena_choose_huge(tsd_t *tsd);
-bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero);
-bool arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
-void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork3(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork8(tsdn_t *tsdn, arena_t *arena);
-void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
-void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
+bool   arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
+void   arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork4(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork5(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork6(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork7(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork8(tsdn_t *tsdn, arena_t *arena);
+void   arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void   arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 8568358c..214ce80b 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_A_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_structs.h"
+
 static inline unsigned
 arena_ind_get(const arena_t *arena) {
 	return arena->ind;
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fa81537c..bda256b9 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,20 +1,29 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_structs.h"
+#include "jemalloc/internal/bin_inlines.h"
 #include "jemalloc/internal/div.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_inlines.h"
 #include "jemalloc/internal/ticker.h"
 
 static inline arena_t *
 arena_get_from_edata(edata_t *edata) {
-	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(edata)],
-	    ATOMIC_RELAXED);
+	return (arena_t *)atomic_load_p(
+	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -28,14 +37,48 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	 * 1) is using auto arena selection (i.e. arena == NULL), and 2) the
 	 * thread is not assigned to a manual arena.
 	 */
-	if (unlikely(size >= oversize_threshold)) {
-		arena_t *tsd_arena = tsd_arena_get(tsd);
-		if (tsd_arena == NULL || arena_is_auto(tsd_arena)) {
-			return arena_choose_huge(tsd);
-		}
+	arena_t *tsd_arena = tsd_arena_get(tsd);
+	if (tsd_arena == NULL) {
+		tsd_arena = arena_choose(tsd, NULL);
 	}
 
-	return arena_choose(tsd, NULL);
+	size_t threshold = atomic_load_zu(
+	    &tsd_arena->pa_shard.pac.oversize_threshold, ATOMIC_RELAXED);
+	if (unlikely(size >= threshold) && arena_is_auto(tsd_arena)) {
+		return arena_choose_huge(tsd);
+	}
+
+	return tsd_arena;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) {
+	if (!config_opt_safety_checks) {
+		return false;
+	}
+
+	/*
+	 * Eagerly detect double free and sized dealloc bugs for large sizes.
+	 * The cost is low enough (as edata will be accessed anyway) to be
+	 * enabled all the time.
+	 */
+	if (unlikely(edata == NULL
+	        || edata_state_get(edata) != extent_state_active)) {
+		safety_check_fail(
+		    "Invalid deallocation detected: "
+		    "pages being freed (%p) not currently active, "
+		    "possibly caused by double free bugs.",
+		    ptr);
+		return true;
+	}
+	if (unlikely(input_size != edata_usize_get(edata)
+	        || input_size > SC_LARGE_MAXCLASS)) {
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
+		    /* true_size */ edata_usize_get(edata), input_size);
+		return true;
+	}
+
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -46,48 +89,56 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 	assert(prof_info != NULL);
 
 	edata_t *edata = NULL;
-	bool is_slab;
+	bool     is_slab;
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-		    ptr);
+		edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-		    ptr);
+		edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	}
 
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
 		assert(edata != NULL);
+		size_t usize = (alloc_ctx == NULL)
+		    ? edata_usize_get(edata)
+		    : emap_alloc_ctx_usize_get(alloc_ctx);
+		if (reset_recent
+		    && large_dalloc_safety_checks(edata, ptr, usize)) {
+			prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
+			return;
+		}
 		large_prof_info_get(tsd, edata, prof_info, reset_recent);
 	} else {
-		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
+		prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 		/*
 		 * No need to set other fields in prof_info; they will never be
-		 * accessed if (uintptr_t)alloc_tctx == (uintptr_t)1U.
+		 * accessed if alloc_tctx == PROF_TCTX_SENTINEL.
 		 */
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+arena_prof_tctx_reset(
+    tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
-		    &arena_emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
-			    &arena_emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(
+			    tsd_tsdn(tsd), &arena_emap_global, ptr);
 			large_prof_tctx_reset(edata);
 		}
 	}
@@ -98,16 +149,16 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-	    ptr);
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
-    size_t size) {
+arena_prof_info_set(
+    tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	cassert(config_prof);
 
 	assert(!edata_slab_get(edata));
@@ -130,8 +181,9 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	 * use a single ticker for all of them.
 	 */
 	ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
-	uint64_t *prng_state = tsd_prng_statep_get(tsd);
-	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks))) {
+	uint64_t      *prng_state = tsd_prng_statep_get(tsd);
+	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks,
+	        tsd_reentrancy_level_get(tsd) > 0))) {
 		arena_decay(tsdn, arena, false, false);
 	}
 }
@@ -143,23 +195,24 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
-    tcache_t *tcache, bool slow_path) {
+    bool slab, tcache_t *tcache, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 
 	if (likely(tcache != NULL)) {
-		if (likely(size <= SC_SMALL_MAXCLASS)) {
-			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path);
+		if (likely(slab)) {
+			assert(sz_can_use_slab(size));
+			return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache,
+			    size, ind, zero, slow_path);
+		} else if (likely(ind < tcache_nbins_get(tcache->tcache_slow)
+		               && !tcache_bin_disabled(ind, &tcache->bins[ind],
+		                   tcache->tcache_slow))) {
+			return tcache_alloc_large(tsdn_tsd(tsdn), arena, tcache,
+			    size, ind, zero, slow_path);
 		}
-		if (likely(size <= tcache_maxclass)) {
-			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path);
-		}
-		/* (size > tcache_maxclass) case falls through. */
-		assert(size > tcache_maxclass);
+		/* (size > tcache_max) case falls through. */
 	}
 
-	return arena_malloc_hard(tsdn, arena, size, ind, zero);
+	return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -176,7 +229,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
-	return sz_index2size(alloc_ctx.szind);
+	return emap_alloc_ctx_usize_get(&alloc_ctx);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -191,8 +244,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 */
 
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
-	    ptr, &full_alloc_ctx);
+	bool                  missing = emap_full_alloc_ctx_try_lookup(
+            tsdn, &arena_emap_global, ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
@@ -207,46 +260,24 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	assert(full_alloc_ctx.szind != SC_NSIZES);
 
-	return sz_index2size(full_alloc_ctx.szind);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
-	if (!config_opt_safety_checks) {
-		return false;
-	}
-
-	/*
-	 * Eagerly detect double free and sized dealloc bugs for large sizes.
-	 * The cost is low enough (as edata will be accessed anyway) to be
-	 * enabled all the time.
-	 */
-	if (unlikely(edata == NULL ||
-	    edata_state_get(edata) != extent_state_active)) {
-		safety_check_fail("Invalid deallocation detected: "
-		    "pages being freed (%p) not currently active, "
-		    "possibly caused by double free bugs.",
-		    (uintptr_t)edata_addr_get(edata));
-		return true;
-	}
-	size_t input_size = sz_index2size(szind);
-	if (unlikely(input_size != edata_usize_get(edata))) {
-		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
-		    /* true_size */ edata_usize_get(edata), input_size);
-		return true;
-	}
-
-	return false;
+	return edata_usize_get(full_alloc_ctx.edata);
 }
 
 static inline void
-arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
+arena_dalloc_large_no_tcache(
+    tsdn_t *tsdn, void *ptr, szind_t szind, size_t usize) {
+	/*
+	 * szind is still needed in this function mainly becuase
+	 * szind < SC_NBINS determines not only if this is a small alloc,
+	 * but also if szind is valid (an inactive extent would have
+	 * szind == SC_NSIZES).
+	 */
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
-		if (large_dalloc_safety_checks(edata, ptr, szind)) {
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
+		if (large_dalloc_safety_checks(edata, ptr, usize)) {
 			/* See the comment in isfree. */
 			return;
 		}
@@ -262,42 +293,76 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind,
+		    emap_alloc_ctx_usize_get(&alloc_ctx));
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
-    bool slow_path) {
-	if (szind < nhbins) {
-		if (config_prof && unlikely(szind < SC_NBINS)) {
-			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
-		} else {
-			tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
-			    slow_path);
-		}
+    size_t usize, bool slow_path) {
+	assert(!tsdn_null(tsdn) && tcache != NULL);
+	bool is_sample_promoted = config_prof && szind < SC_NBINS;
+	if (unlikely(is_sample_promoted)) {
+		arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
 	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
-		if (large_dalloc_safety_checks(edata, ptr, szind)) {
-			/* See the comment in isfree. */
-			return;
+		if (szind < tcache_nbins_get(tcache->tcache_slow)
+		    && !tcache_bin_disabled(
+		        szind, &tcache->bins[szind], tcache->tcache_slow)) {
+			tcache_dalloc_large(
+			    tsdn_tsd(tsdn), tcache, ptr, szind, slow_path);
+		} else {
+			edata_t *edata = emap_edata_lookup(
+			    tsdn, &arena_emap_global, ptr);
+			if (large_dalloc_safety_checks(edata, ptr, usize)) {
+				/* See the comment in isfree. */
+				return;
+			}
+			large_dalloc(tsdn, edata);
 		}
-		large_dalloc(tsdn, edata);
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
+	if (!config_debug) {
+		return false;
+	}
+	edata_t   *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	szind_t    binind = edata_szind_get(edata);
+	div_info_t div_info = arena_binind_div_info[binind];
+	/*
+	 * Calls the internal function bin_slab_regind_impl because the
+	 * safety check does not require a lock.
+	 */
+	size_t regind = bin_slab_regind_impl(&div_info, binind, edata, ptr);
+	slab_data_t      *slab_data = edata_slab_data_get(edata);
+	const bin_info_t *bin_info = &bin_infos[binind];
+	assert(edata_nfree_get(edata) < bin_info->nregs);
+	if (unlikely(!bitmap_get(
+	        slab_data->bitmap, &bin_info->bitmap_info, regind))) {
+		safety_check_fail(
+		    "Invalid deallocation detected: the pointer being freed (%p) not "
+		    "currently active, possibly caused by double free bugs.\n",
+		    ptr);
+		return true;
+	}
+	return false;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
@@ -313,26 +378,31 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	if (caller_alloc_ctx != NULL) {
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
-		util_assume(!tsdn_null(tsdn));
-		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-		    &alloc_ctx);
+		util_assume(tsdn != NULL);
+		emap_alloc_ctx_lookup(
+		    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
-		    alloc_ctx.szind, slow_path);
+		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
+			return;
+		}
+		tcache_dalloc_small(
+		    tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
-		    slow_path);
+		    emap_alloc_ctx_usize_get(&alloc_ctx), slow_path);
 	}
 }
 
@@ -347,21 +417,22 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		alloc_ctx.szind = sz_size2index(size);
-		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+		szind_t szind = sz_size2index(size);
+		emap_alloc_ctx_init(
+		    &alloc_ctx, szind, (szind < SC_NBINS), size);
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-		    &alloc_ctx);
+		emap_alloc_ctx_lookup(
+		    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 
 		assert(alloc_ctx.szind == sz_size2index(size));
 		assert((config_prof && opt_prof)
 		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = emap_edata_lookup(tsdn,
-			    &arena_emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(
+			    tsdn, &arena_emap_global, ptr);
 			assert(alloc_ctx.szind == edata_szind_get(edata));
 			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
@@ -371,7 +442,8 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind,
+		    emap_alloc_ctx_usize_get(&alloc_ctx));
 	}
 }
 
@@ -391,9 +463,10 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	if (config_prof && opt_prof) {
 		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-			    &alloc_ctx);
+			emap_alloc_ctx_lookup(
+			    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
+			assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size);
 		} else {
 			alloc_ctx = *caller_alloc_ctx;
 		}
@@ -407,30 +480,37 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		emap_alloc_ctx_init(
+		    &alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, sz_s2u(size));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
-		    alloc_ctx.szind, slow_path);
+		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
+			return;
+		}
+		tcache_dalloc_small(
+		    tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
-		    slow_path);
+		    sz_s2u(size), slow_path);
 	}
 }
 
 static inline void
-arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t alignment) {
+arena_cache_oblivious_randomize(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t alignment) {
 	assert(edata_base_get(edata) == edata_addr_get(edata));
 
 	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
+		unsigned lg_range = LG_PAGE
+		    - lg_floor(CACHELINE_CEILING(alignment));
 		size_t r;
 		if (!tsdn_null(tsdn)) {
 			tsd_t *tsd = tsdn_tsd(tsdn);
@@ -440,110 +520,18 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
 			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
 		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
-		    edata->e_addr);
-	}
-}
-
-/*
- * The dalloc bin info contains just the information that the common paths need
- * during tcache flushes.  By force-inlining these paths, and using local copies
- * of data (so that the compiler knows it's constant), we avoid a whole bunch of
- * redundant loads and stores by leaving this information in registers.
- */
-typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
-struct arena_dalloc_bin_locked_info_s {
-	div_info_t div_info;
-	uint32_t nregs;
-	uint64_t ndalloc;
-};
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
-    edata_t *slab, const void *ptr) {
-	size_t diff, regind;
-
-	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
-	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
-	    (uintptr_t)bin_infos[binind].reg_size == 0);
-
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
-
-	/* Avoid doing division with a variable divisor. */
-	regind = div_compute(&info->div_info, diff);
-
-	assert(regind < bin_infos[binind].nregs);
-
-	return regind;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
-    szind_t binind) {
-	info->div_info = arena_binind_div_info[binind];
-	info->nregs = bin_infos[binind].nregs;
-	info->ndalloc = 0;
-}
-
-/*
- * Does the deallocation work associated with freeing a single pointer (a
- * "step") in between a arena_dalloc_bin_locked begin and end call.
- *
- * Returns true if arena_slab_dalloc must be called on slab.  Doesn't do
- * stats updates, which happen during finish (this lets running counts get left
- * in a register).
- */
-JEMALLOC_ALWAYS_INLINE bool
-arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
-    void *ptr) {
-	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t regind = arena_slab_regind(info, binind, slab, ptr);
-	slab_data_t *slab_data = edata_slab_data_get(slab);
-
-	assert(edata_nfree_get(slab) < bin_info->nregs);
-	/* Freeing an unallocated pointer can cause assertion failure. */
-	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
-
-	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
-	edata_nfree_inc(slab);
-
-	if (config_stats) {
-		info->ndalloc++;
-	}
-
-	unsigned nfree = edata_nfree_get(slab);
-	if (nfree == bin_info->nregs) {
-		arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
-		    bin);
-		return true;
-	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
-		    bin);
-	}
-	return false;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    arena_dalloc_bin_locked_info_t *info) {
-	if (config_stats) {
-		bin->stats.ndalloc += info->ndalloc;
-		assert(bin->stats.curregs >= (size_t)info->ndalloc);
-		bin->stats.curregs -= (size_t)info->ndalloc;
+		uintptr_t random_offset = ((uintptr_t)r)
+		    << (LG_PAGE - lg_range);
+		edata->e_addr = (void *)((byte_t *)edata->e_addr
+		    + random_offset);
+		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment)
+		    == edata->e_addr);
 	}
 }
 
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
-	bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
+	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
 	return shard0 + binshard;
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 15f1d345..01012f68 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
 #define JEMALLOC_INTERNAL_ARENA_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
@@ -13,28 +14,34 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 typedef struct arena_stats_large_s arena_stats_large_t;
 struct arena_stats_large_s {
 	/*
-	 * Total number of allocation/deallocation requests served directly by
-	 * the arena.
+	 * Total number of large allocation/deallocation requests served directly
+	 * by the arena.
 	 */
-	locked_u64_t	nmalloc;
-	locked_u64_t	ndalloc;
+	locked_u64_t nmalloc;
+	locked_u64_t ndalloc;
+
+	/*
+	 * Total large active bytes (allocated - deallocated) served directly
+	 * by the arena.
+	 */
+	locked_u64_t active_bytes;
 
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	locked_u64_t	nrequests; /* Partially derived. */
+	locked_u64_t nrequests; /* Partially derived. */
 	/*
 	 * Number of tcache fills / flushes for large (similarly, periodically
 	 * merged).  Note that there is no large tcache batch-fill currently
 	 * (i.e. only fill 1 at a time); however flush may be batched.
 	 */
-	locked_u64_t	nfills; /* Partially derived. */
-	locked_u64_t	nflushes; /* Partially derived. */
+	locked_u64_t nfills;   /* Partially derived. */
+	locked_u64_t nflushes; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
-	size_t		curlextents; /* Derived. */
+	size_t curlextents; /* Derived. */
 };
 
 /*
@@ -50,38 +57,40 @@ struct arena_stats_s {
 	 * resident includes the base stats -- that's why it lives here and not
 	 * in pa_shard_stats_t.
 	 */
-	size_t			base; /* Derived. */
-	size_t			resident; /* Derived. */
-	size_t			metadata_thp; /* Derived. */
-	size_t			mapped; /* Derived. */
+	size_t base;           /* Derived. */
+	size_t metadata_edata; /* Derived. */
+	size_t metadata_rtree; /* Derived. */
+	size_t resident;       /* Derived. */
+	size_t metadata_thp;   /* Derived. */
+	size_t mapped;         /* Derived. */
 
-	atomic_zu_t		internal;
+	atomic_zu_t internal;
 
-	size_t			allocated_large; /* Derived. */
-	uint64_t		nmalloc_large; /* Derived. */
-	uint64_t		ndalloc_large; /* Derived. */
-	uint64_t		nfills_large; /* Derived. */
-	uint64_t		nflushes_large; /* Derived. */
-	uint64_t		nrequests_large; /* Derived. */
+	size_t   allocated_large; /* Derived. */
+	uint64_t nmalloc_large;   /* Derived. */
+	uint64_t ndalloc_large;   /* Derived. */
+	uint64_t nfills_large;    /* Derived. */
+	uint64_t nflushes_large;  /* Derived. */
+	uint64_t nrequests_large; /* Derived. */
 
 	/*
 	 * The stats logically owned by the pa_shard in the same arena.  This
 	 * lives here only because it's convenient for the purposes of the ctl
 	 * module -- it only knows about the single arena_stats.
 	 */
-	pa_shard_stats_t	pa_shard_stats;
+	pa_shard_stats_t pa_shard_stats;
 
 	/* Number of bytes cached in tcache associated with this arena. */
-	size_t			tcache_bytes; /* Derived. */
-	size_t			tcache_stashed_bytes; /* Derived. */
+	size_t tcache_bytes;         /* Derived. */
+	size_t tcache_stashed_bytes; /* Derived. */
 
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
 	/* One element for each large size class. */
-	arena_stats_large_t	lstats[SC_NSIZES - SC_NBINS];
+	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 
 	/* Arena uptime. */
-	nstime_t		uptime;
+	nstime_t uptime;
 };
 
 static inline bool
@@ -92,7 +101,7 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 		}
 	}
 	if (LOCKEDINT_MTX_INIT(arena_stats->mtx, "arena_stats",
-	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	/* Memory is zeroed, so there is no need to clear stats. */
@@ -106,8 +115,8 @@ arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 	arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
 	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
 	    &lstats->nrequests, nrequests);
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
-	    &lstats->nflushes, 1);
+	locked_inc_u64(
+	    tsdn, LOCKEDINT_MTX(arena_stats->mtx), &lstats->nflushes, 1);
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index e2a5a408..471f7692 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
@@ -31,20 +32,20 @@ struct arena_s {
 	 *
 	 * Synchronization: atomic.
 	 */
-	atomic_u_t		nthreads[2];
+	atomic_u_t nthreads[2];
 
 	/* Next bin shard for binding new threads. Synchronization: atomic. */
-	atomic_u_t		binshard_next;
+	atomic_u_t binshard_next;
 
 	/*
 	 * When percpu_arena is enabled, to amortize the cost of reading /
 	 * updating the current CPU id, track the most recent thread accessing
 	 * this arena, and only read CPU if there is a mismatch.
 	 */
-	tsdn_t		*last_thd;
+	tsdn_t *last_thd;
 
 	/* Synchronization: internal. */
-	arena_stats_t		stats;
+	arena_stats_t stats;
 
 	/*
 	 * Lists of tcaches and cache_bin_array_descriptors for extant threads
@@ -53,28 +54,28 @@ struct arena_s {
 	 *
 	 * Synchronization: tcache_ql_mtx.
 	 */
-	ql_head(tcache_slow_t)			tcache_ql;
-	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
-	malloc_mutex_t				tcache_ql_mtx;
+	ql_head(tcache_slow_t) tcache_ql;
+	ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql;
+	malloc_mutex_t tcache_ql_mtx;
 
 	/*
 	 * Represents a dss_prec_t, but atomically.
 	 *
 	 * Synchronization: atomic.
 	 */
-	atomic_u_t		dss_prec;
+	atomic_u_t dss_prec;
 
 	/*
 	 * Extant large allocations.
 	 *
 	 * Synchronization: large_mtx.
 	 */
-	edata_list_active_t	large;
+	edata_list_active_t large;
 	/* Synchronizes all large allocation/update/deallocation. */
-	malloc_mutex_t		large_mtx;
+	malloc_mutex_t large_mtx;
 
 	/* The page-level allocator shard this arena uses. */
-	pa_shard_t		pa_shard;
+	pa_shard_t pa_shard;
 
 	/*
 	 * A cached copy of base->ind.  This can get accessed on hot paths;
@@ -87,15 +88,24 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	base_t			*base;
+	base_t *base;
 	/* Used to determine uptime.  Read-only after initialization. */
-	nstime_t		create_time;
+	nstime_t create_time;
+
+	/* The name of the arena. */
+	char name[ARENA_NAME_LEN];
 
 	/*
 	 * The arena is allocated alongside its bins; really this is a
 	 * dynamically sized array determined by the binshard settings.
+	 * Enforcing cacheline-alignment to minimize the number of cachelines
+	 * touched on the hot paths.
 	 */
-	bin_t			bins[0];
+	JEMALLOC_WARN_ON_USAGE(
+	    "Do not use this field directly. "
+	    "Use `arena_get_bin` instead.")
+	JEMALLOC_ALIGNED(CACHELINE)
+	bin_t all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index d0e12917..c586164f 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -1,39 +1,41 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
 #define JEMALLOC_INTERNAL_ARENA_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 /* Default decay times in milliseconds. */
-#define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
-#define MUZZY_DECAY_MS_DEFAULT	(0)
+#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000)
+#define MUZZY_DECAY_MS_DEFAULT (0)
 /* Number of event ticks between time checks. */
-#define ARENA_DECAY_NTICKS_PER_UPDATE	1000
+#define ARENA_DECAY_NTICKS_PER_UPDATE 1000
+/* Maximum length of the arena name. */
+#define ARENA_NAME_LEN 32
 
-typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
 
 typedef enum {
-	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
+	percpu_arena_mode_names_base = 0, /* Used for options processing. */
 
 	/*
 	 * *_uninit are used only during bootstrapping, and must correspond
 	 * to initialized variant plus percpu_arena_mode_enabled_base.
 	 */
-	percpu_arena_uninit            = 0,
-	per_phycpu_arena_uninit        = 1,
+	percpu_arena_uninit = 0,
+	per_phycpu_arena_uninit = 1,
 
 	/* All non-disabled modes must come after percpu_arena_disabled. */
-	percpu_arena_disabled          = 2,
+	percpu_arena_disabled = 2,
 
-	percpu_arena_mode_names_limit  = 3, /* Used for options processing. */
+	percpu_arena_mode_names_limit = 3, /* Used for options processing. */
 	percpu_arena_mode_enabled_base = 3,
 
-	percpu_arena                   = 3,
-	per_phycpu_arena               = 4  /* Hyper threads share arena. */
+	percpu_arena = 3,
+	per_phycpu_arena = 4 /* Hyper threads share arena. */
 } percpu_arena_mode_t;
 
-#define PERCPU_ARENA_ENABLED(m)	((m) >= percpu_arena_mode_enabled_base)
-#define PERCPU_ARENA_DEFAULT	percpu_arena_disabled
+#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base)
+#define PERCPU_ARENA_DEFAULT percpu_arena_disabled
 
 /*
  * When allocation_size >= oversize_threshold, use the dedicated huge arena
diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
index be4d45b3..1b5da72f 100644
--- a/include/jemalloc/internal/assert.h
+++ b/include/jemalloc/internal/assert.h
@@ -1,3 +1,4 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
 
@@ -6,51 +7,57 @@
  * assertion failure.
  */
 #ifndef assert
-#define assert(e) do {							\
-	if (unlikely(config_debug && !(e))) {				\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
-		    __FILE__, __LINE__, #e);				\
-		abort();						\
-	}								\
-} while (0)
+#	define assert(e)                                                            \
+		do {                                                                 \
+			if (unlikely(config_debug && !(e))) {                        \
+				malloc_printf(                                       \
+				    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
+				    __FILE__, __LINE__, #e);                         \
+				abort();                                             \
+			}                                                            \
+		} while (0)
 #endif
 
 #ifndef not_reached
-#define not_reached() do {						\
-	if (config_debug) {						\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
-		    __FILE__, __LINE__);				\
-		abort();						\
-	}								\
-	unreachable();							\
-} while (0)
+#	define not_reached()                                                        \
+		do {                                                                 \
+			if (config_debug) {                                          \
+				malloc_printf(                                       \
+				    "<jemalloc>: %s:%d: Unreachable code reached\n", \
+				    __FILE__, __LINE__);                             \
+				abort();                                             \
+			}                                                            \
+			unreachable();                                               \
+		} while (0)
 #endif
 
 #ifndef not_implemented
-#define not_implemented() do {						\
-	if (config_debug) {						\
-		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
-		    __FILE__, __LINE__);				\
-		abort();						\
-	}								\
-} while (0)
+#	define not_implemented()                                              \
+		do {                                                           \
+			if (config_debug) {                                    \
+				malloc_printf(                                 \
+				    "<jemalloc>: %s:%d: Not implemented\n",    \
+				    __FILE__, __LINE__);                       \
+				abort();                                       \
+			}                                                      \
+		} while (0)
 #endif
 
 #ifndef assert_not_implemented
-#define assert_not_implemented(e) do {					\
-	if (unlikely(config_debug && !(e))) {				\
-		not_implemented();					\
-	}								\
-} while (0)
+#	define assert_not_implemented(e)                                      \
+		do {                                                           \
+			if (unlikely(config_debug && !(e))) {                  \
+				not_implemented();                             \
+			}                                                      \
+		} while (0)
 #endif
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
 #ifndef cassert
-#define cassert(c) do {							\
-	if (unlikely(!(c))) {						\
-		not_reached();						\
-	}								\
-} while (0)
+#	define cassert(c)                                                     \
+		do {                                                           \
+			if (unlikely(!(c))) {                                  \
+				not_reached();                                 \
+			}                                                      \
+		} while (0)
 #endif
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index c0f73122..f80e5640 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -1,27 +1,29 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_H
 #define JEMALLOC_INTERNAL_ATOMIC_H
 
-#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+#include "jemalloc/internal/jemalloc_preamble.h"
 
 #define JEMALLOC_U8_ATOMICS
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
-#  include "jemalloc/internal/atomic_gcc_atomic.h"
-#  if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
-#    undef JEMALLOC_U8_ATOMICS
-#  endif
+#	include "jemalloc/internal/atomic_gcc_atomic.h"
+#	if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
+#		undef JEMALLOC_U8_ATOMICS
+#	endif
 #elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
-#  include "jemalloc/internal/atomic_gcc_sync.h"
-#  if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
-#    undef JEMALLOC_U8_ATOMICS
-#  endif
+#	include "jemalloc/internal/atomic_gcc_sync.h"
+#	if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
+#		undef JEMALLOC_U8_ATOMICS
+#	endif
 #elif defined(_MSC_VER)
-#  include "jemalloc/internal/atomic_msvc.h"
+#	include "jemalloc/internal/atomic_msvc.h"
 #elif defined(JEMALLOC_C11_ATOMICS)
-#  include "jemalloc/internal/atomic_c11.h"
+#	include "jemalloc/internal/atomic_c11.h"
 #else
-#  error "Don't have atomics implemented on this platform."
+#	error "Don't have atomics implemented on this platform."
 #endif
 
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 /*
  * This header gives more or less a backport of C11 atomics. The user can write
  * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate
@@ -54,22 +56,19 @@
 /*
  * Another convenience -- simple atomic helper functions.
  */
-#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type,	\
-    lg_size)								\
-    JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)		\
-    ATOMIC_INLINE void							\
-    atomic_load_add_store_##short_type(atomic_##short_type##_t *a,	\
-	type inc) {							\
-	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
-	    type newval = oldval + inc;					\
-	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
-	}								\
-    ATOMIC_INLINE void							\
-    atomic_load_sub_store_##short_type(atomic_##short_type##_t *a,	\
-	type inc) {							\
-	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
-	    type newval = oldval - inc;					\
-	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
+#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type, lg_size)      \
+	JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)               \
+	ATOMIC_INLINE void atomic_load_add_store_##short_type(                 \
+	    atomic_##short_type##_t *a, type inc) {                            \
+		type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);     \
+		type newval = oldval + inc;                                    \
+		atomic_store_##short_type(a, newval, ATOMIC_RELAXED);          \
+	}                                                                      \
+	ATOMIC_INLINE void atomic_load_sub_store_##short_type(                 \
+	    atomic_##short_type##_t *a, type inc) {                            \
+		type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);     \
+		type newval = oldval - inc;                                    \
+		atomic_store_##short_type(a, newval, ATOMIC_RELAXED);          \
 	}
 
 /*
@@ -77,7 +76,7 @@
  * fact.
  */
 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#  define JEMALLOC_ATOMIC_U64
+#	define JEMALLOC_ATOMIC_U64
 #endif
 
 JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
@@ -90,6 +89,8 @@ JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
 
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
 
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(int, i, LG_SIZEOF_INT)
+
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
 
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
index a5f9313a..1e86e2a0 100644
--- a/include/jemalloc/internal/atomic_c11.h
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H
 #define JEMALLOC_INTERNAL_ATOMIC_C11_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include <stdatomic.h>
 
 #define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__)
@@ -14,6 +15,7 @@
 
 #define atomic_fence atomic_thread_fence
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
     /* unused */ lg_size)						\
 typedef _Atomic(type) atomic_##short_type##_t;				\
@@ -58,40 +60,35 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 	return atomic_compare_exchange_strong_explicit(a, expected,	\
 	    desired, success_mo, failure_mo);				\
 }
+/* clang-format on */
 
 /*
  * Integral types have some special operations available that non-integral ones
  * lack.
  */
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, 		\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_add_explicit(a, val, mo);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_sub_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_and_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_or_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_xor_explicit(a, val, mo);			\
-}
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_add_explicit(a, val, mo);                  \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_sub_explicit(a, val, mo);                  \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_and_explicit(a, val, mo);                  \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_or_explicit(a, val, mo);                   \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_xor_explicit(a, val, mo);                  \
+	}
 
 #endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */
diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
index 471515e8..a828a6b0 100644
--- a/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -1,9 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
 #define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -36,94 +40,82 @@ atomic_fence(atomic_memory_order_t mo) {
 	__atomic_thread_fence(atomic_enum_to_builtin(mo));
 }
 
-#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-typedef struct {							\
-	type repr;							\
-} atomic_##short_type##_t;						\
-									\
-ATOMIC_INLINE type							\
-atomic_load_##short_type(const atomic_##short_type##_t *a,		\
-    atomic_memory_order_t mo) {						\
-	type result;							\
-	__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo));	\
-	return result;							\
-}									\
-									\
-ATOMIC_INLINE void							\
-atomic_store_##short_type(atomic_##short_type##_t *a, type val,		\
-    atomic_memory_order_t mo) {						\
-	__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo));	\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	type result;							\
-	__atomic_exchange(&a->repr, &val, &result,			\
-	    atomic_enum_to_builtin(mo));				\
-	return result;							\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    UNUSED type *expected, type desired,				\
-    atomic_memory_order_t success_mo,					\
-    atomic_memory_order_t failure_mo) {					\
-	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
-	    true, atomic_enum_to_builtin(success_mo),			\
-	    atomic_enum_to_builtin(failure_mo));			\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    UNUSED type *expected, type desired,				\
-    atomic_memory_order_t success_mo,					\
-    atomic_memory_order_t failure_mo) {					\
-	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
-	    false,							\
-	    atomic_enum_to_builtin(success_mo),				\
-	    atomic_enum_to_builtin(failure_mo));			\
-}
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+	typedef struct {                                                       \
+		type repr;                                                     \
+	} atomic_##short_type##_t;                                             \
+                                                                               \
+	ATOMIC_INLINE type atomic_load_##short_type(                           \
+	    const atomic_##short_type##_t *a, atomic_memory_order_t mo) {      \
+		type result;                                                   \
+		__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo));  \
+		return result;                                                 \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE void atomic_store_##short_type(                          \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo));    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_exchange_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		type result;                                                   \
+		__atomic_exchange(                                             \
+		    &a->repr, &val, &result, atomic_enum_to_builtin(mo));      \
+		return result;                                                 \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_weak_##short_type(          \
+	    atomic_##short_type##_t *a, UNUSED type *expected, type desired,   \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		return __atomic_compare_exchange(&a->repr, expected, &desired, \
+		    true, atomic_enum_to_builtin(success_mo),                  \
+		    atomic_enum_to_builtin(failure_mo));                       \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_strong_##short_type(        \
+	    atomic_##short_type##_t *a, UNUSED type *expected, type desired,   \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		return __atomic_compare_exchange(&a->repr, expected, &desired, \
+		    false, atomic_enum_to_builtin(success_mo),                 \
+		    atomic_enum_to_builtin(failure_mo));                       \
+	}
 
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_add(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_sub(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_and(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_or(                                      \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_xor(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}
 
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_add(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_sub(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_and(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_or(&a->repr, val,				\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_xor(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}
+#undef ATOMIC_INLINE
 
 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index e02b7cbe..9e2ff9c8 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -1,7 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -25,13 +30,13 @@ atomic_fence(atomic_memory_order_t mo) {
 		return;
 	}
 	asm volatile("" ::: "memory");
-#  if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	/* This is implicit on x86. */
-#  elif defined(__ppc64__)
+#elif defined(__ppc64__)
 	asm volatile("lwsync");
-#  elif defined(__ppc__)
+#elif defined(__ppc__)
 	asm volatile("sync");
-#  elif defined(__sparc__) && defined(__arch64__)
+#elif defined(__sparc__) && defined(__arch64__)
 	if (mo == atomic_memory_order_acquire) {
 		asm volatile("membar #LoadLoad | #LoadStore");
 	} else if (mo == atomic_memory_order_release) {
@@ -39,9 +44,9 @@ atomic_fence(atomic_memory_order_t mo) {
 	} else {
 		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
 	}
-#  else
+#else
 	__sync_synchronize();
-#  endif
+#endif
 	asm volatile("" ::: "memory");
 }
 
@@ -64,25 +69,25 @@ atomic_fence(atomic_memory_order_t mo) {
 
 ATOMIC_INLINE void
 atomic_pre_sc_load_fence() {
-#  if defined(__i386__) || defined(__x86_64__) ||			\
-    (defined(__sparc__) && defined(__arch64__))
+#if defined(__i386__) || defined(__x86_64__)                                   \
+    || (defined(__sparc__) && defined(__arch64__))
 	atomic_fence(atomic_memory_order_relaxed);
-#  else
+#else
 	atomic_fence(atomic_memory_order_seq_cst);
-#  endif
+#endif
 }
 
 ATOMIC_INLINE void
 atomic_post_sc_store_fence() {
-#  if defined(__i386__) || defined(__x86_64__) ||			\
-    (defined(__sparc__) && defined(__arch64__))
+#if defined(__i386__) || defined(__x86_64__)                                   \
+    || (defined(__sparc__) && defined(__arch64__))
 	atomic_fence(atomic_memory_order_seq_cst);
-#  else
+#else
 	atomic_fence(atomic_memory_order_relaxed);
-#  endif
-
+#endif
 }
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
     /* unused */ lg_size)						\
 typedef struct {							\
@@ -157,39 +162,36 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 		return false;						\
 	}								\
 }
+/* clang-format on */
 
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_add(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_sub(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_and(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_or(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_xor(&a->repr, val);			\
-}
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_add(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_sub(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_and(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_or(&a->repr, val);                     \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_xor(&a->repr, val);                    \
+	}
+
+#undef ATOMIC_INLINE
 
 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
index 67057ce5..7accca63 100644
--- a/include/jemalloc/internal/atomic_msvc.h
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -1,7 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H
 #define JEMALLOC_INTERNAL_ATOMIC_MSVC_H
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -11,109 +16,106 @@ typedef enum {
 	atomic_memory_order_seq_cst
 } atomic_memory_order_t;
 
-typedef char atomic_repr_0_t;
-typedef short atomic_repr_1_t;
-typedef long atomic_repr_2_t;
+typedef char    atomic_repr_0_t;
+typedef short   atomic_repr_1_t;
+typedef long    atomic_repr_2_t;
 typedef __int64 atomic_repr_3_t;
 
 ATOMIC_INLINE void
 atomic_fence(atomic_memory_order_t mo) {
 	_ReadWriteBarrier();
-#  if defined(_M_ARM) || defined(_M_ARM64)
+#if defined(_M_ARM) || defined(_M_ARM64)
 	/* ARM needs a barrier for everything but relaxed. */
 	if (mo != atomic_memory_order_relaxed) {
 		MemoryBarrier();
 	}
-#  elif defined(_M_IX86) || defined (_M_X64)
+#elif defined(_M_IX86) || defined(_M_X64)
 	/* x86 needs a barrier only for seq_cst. */
 	if (mo == atomic_memory_order_seq_cst) {
 		MemoryBarrier();
 	}
-#  else
-#  error "Don't know how to create atomics for this platform for MSVC."
-#  endif
+#else
+#	error "Don't know how to create atomics for this platform for MSVC."
+#endif
 	_ReadWriteBarrier();
 }
 
-#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t
+#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_##lg_size##_t
 
 #define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b)
-#define ATOMIC_RAW_CONCAT(a, b) a ## b
+#define ATOMIC_RAW_CONCAT(a, b) a##b
 
-#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT(	\
-    base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
+#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size)                            \
+	ATOMIC_CONCAT(base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
 
-#define ATOMIC_INTERLOCKED_SUFFIX(lg_size)				\
-    ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
+#define ATOMIC_INTERLOCKED_SUFFIX(lg_size)                                     \
+	ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
 
 #define ATOMIC_INTERLOCKED_SUFFIX_0 8
 #define ATOMIC_INTERLOCKED_SUFFIX_1 16
 #define ATOMIC_INTERLOCKED_SUFFIX_2
 #define ATOMIC_INTERLOCKED_SUFFIX_3 64
 
-#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)		\
-typedef struct {							\
-	ATOMIC_INTERLOCKED_REPR(lg_size) repr;				\
-} atomic_##short_type##_t;						\
-									\
-ATOMIC_INLINE type							\
-atomic_load_##short_type(const atomic_##short_type##_t *a,		\
-    atomic_memory_order_t mo) {						\
-	ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr;			\
-	if (mo != atomic_memory_order_relaxed) {			\
-		atomic_fence(atomic_memory_order_acquire);		\
-	}								\
-	return (type) ret;						\
-}									\
-									\
-ATOMIC_INLINE void							\
-atomic_store_##short_type(atomic_##short_type##_t *a,			\
-    type val, atomic_memory_order_t mo) {				\
-	if (mo != atomic_memory_order_relaxed) {			\
-		atomic_fence(atomic_memory_order_release);		\
-	}								\
-	a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val;		\
-	if (mo == atomic_memory_order_seq_cst) {			\
-		atomic_fence(atomic_memory_order_seq_cst);		\
-	}								\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange,	\
-	    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);	\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
-    atomic_memory_order_t failure_mo) {					\
-	ATOMIC_INTERLOCKED_REPR(lg_size) e =				\
-	    (ATOMIC_INTERLOCKED_REPR(lg_size))*expected;		\
-	ATOMIC_INTERLOCKED_REPR(lg_size) d =				\
-	    (ATOMIC_INTERLOCKED_REPR(lg_size))desired;			\
-	ATOMIC_INTERLOCKED_REPR(lg_size) old =				\
-	    ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, 	\
-		lg_size)(&a->repr, d, e);				\
-	if (old == e) {							\
-		return true;						\
-	} else {							\
-		*expected = (type)old;					\
-		return false;						\
-	}								\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
-    atomic_memory_order_t failure_mo) {					\
-	/* We implement the weak version with strong semantics. */	\
-	return atomic_compare_exchange_weak_##short_type(a, expected,	\
-	    desired, success_mo, failure_mo);				\
-}
-
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)                   \
+	typedef struct {                                                       \
+		ATOMIC_INTERLOCKED_REPR(lg_size) repr;                         \
+	} atomic_##short_type##_t;                                             \
+                                                                               \
+	ATOMIC_INLINE type atomic_load_##short_type(                           \
+	    const atomic_##short_type##_t *a, atomic_memory_order_t mo) {      \
+		ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr;                \
+		if (mo != atomic_memory_order_relaxed) {                       \
+			atomic_fence(atomic_memory_order_acquire);             \
+		}                                                              \
+		return (type)ret;                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE void atomic_store_##short_type(                          \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		if (mo != atomic_memory_order_relaxed) {                       \
+			atomic_fence(atomic_memory_order_release);             \
+		}                                                              \
+		a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size))val;               \
+		if (mo == atomic_memory_order_seq_cst) {                       \
+			atomic_fence(atomic_memory_order_seq_cst);             \
+		}                                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_exchange_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange,     \
+		    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_weak_##short_type(          \
+	    atomic_##short_type##_t *a, type *expected, type desired,          \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		e = (ATOMIC_INTERLOCKED_REPR(lg_size)) * expected;             \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		d = (ATOMIC_INTERLOCKED_REPR(lg_size))desired;                 \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		old = ATOMIC_INTERLOCKED_NAME(                                 \
+		    _InterlockedCompareExchange, lg_size)(&a->repr, d, e);     \
+		if (old == e) {                                                \
+			return true;                                           \
+		} else {                                                       \
+			*expected = (type)old;                                 \
+			return false;                                          \
+		}                                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_strong_##short_type(        \
+	    atomic_##short_type##_t *a, type *expected, type desired,          \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		/* We implement the weak version with strong semantics. */     \
+		return atomic_compare_exchange_weak_##short_type(              \
+		    a, expected, desired, success_mo, failure_mo);             \
+	}
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)	\
 JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)			\
 									\
@@ -154,5 +156,8 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
 	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)(	\
 	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
 }
+/* clang-format on */
+
+#undef ATOMIC_INLINE
 
 #endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 6ae3c8d8..efc0aaa4 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -1,26 +1,31 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 
-extern bool opt_background_thread;
-extern size_t opt_max_background_threads;
-extern malloc_mutex_t background_thread_lock;
-extern atomic_b_t background_thread_enabled_state;
-extern size_t n_background_threads;
-extern size_t max_background_threads;
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/background_thread_structs.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/mutex.h"
+
+extern bool                      opt_background_thread;
+extern size_t                    opt_max_background_threads;
+extern malloc_mutex_t            background_thread_lock;
+extern atomic_b_t                background_thread_enabled_state;
+extern size_t                    n_background_threads;
+extern size_t                    max_background_threads;
 extern background_thread_info_t *background_thread_info;
 
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
-bool background_thread_is_started(background_thread_info_t* info);
-void background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep);
+bool background_thread_is_started(background_thread_info_t *info);
+void background_thread_wakeup_early(
+    background_thread_info_t *info, nstime_t *remaining_sleep);
 void background_thread_prefork0(tsdn_t *tsdn);
 void background_thread_prefork1(tsdn_t *tsdn);
 void background_thread_postfork_parent(tsdn_t *tsdn);
 void background_thread_postfork_child(tsdn_t *tsdn);
-bool background_thread_stats_read(tsdn_t *tsdn,
-    background_thread_stats_t *stats);
+bool background_thread_stats_read(
+    tsdn_t *tsdn, background_thread_stats_t *stats);
 void background_thread_ctl_init(tsdn_t *tsdn);
 
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 92c5febe..e822a3f7 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -1,15 +1,25 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_a.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/background_thread_externs.h"
+
 JEMALLOC_ALWAYS_INLINE bool
 background_thread_enabled(void) {
 	return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+background_thread_enabled_set_impl(bool state) {
+	atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 background_thread_enabled_set(tsdn_t *tsdn, bool state) {
 	malloc_mutex_assert_owner(tsdn, &background_thread_lock);
-	atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+	background_thread_enabled_set_impl(state);
 }
 
 JEMALLOC_ALWAYS_INLINE background_thread_info_t *
@@ -26,14 +36,14 @@ background_thread_info_get(size_t ind) {
 JEMALLOC_ALWAYS_INLINE uint64_t
 background_thread_wakeup_time_get(background_thread_info_t *info) {
 	uint64_t next_wakeup = nstime_ns(&info->next_wakeup);
-	assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE) ==
-	    (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
+	assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE)
+	    == (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
 	return next_wakeup;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-background_thread_wakeup_time_set(tsdn_t *tsdn, background_thread_info_t *info,
-    uint64_t wakeup_time) {
+background_thread_wakeup_time_set(
+    tsdn_t *tsdn, background_thread_info_t *info, uint64_t wakeup_time) {
 	malloc_mutex_assert_owner(tsdn, &info->mtx);
 	atomic_store_b(&info->indefinite_sleep,
 	    wakeup_time == BACKGROUND_THREAD_INDEFINITE_SLEEP, ATOMIC_RELEASE);
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index 83a91984..d56673da 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -1,10 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
+
 /* This file really combines "structs" and "types", but only transitionally. */
 
 #if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
-#  define JEMALLOC_PTHREAD_CREATE_WRAPPER
+#	define JEMALLOC_PTHREAD_CREATE_WRAPPER
 #endif
 
 #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
@@ -32,33 +35,33 @@ typedef enum {
 struct background_thread_info_s {
 #ifdef JEMALLOC_BACKGROUND_THREAD
 	/* Background thread is pthread specific. */
-	pthread_t		thread;
-	pthread_cond_t		cond;
+	pthread_t      thread;
+	pthread_cond_t cond;
 #endif
-	malloc_mutex_t		mtx;
-	background_thread_state_t	state;
+	malloc_mutex_t            mtx;
+	background_thread_state_t state;
 	/* When true, it means no wakeup scheduled. */
-	atomic_b_t		indefinite_sleep;
+	atomic_b_t indefinite_sleep;
 	/* Next scheduled wakeup time (absolute time in ns). */
-	nstime_t		next_wakeup;
+	nstime_t next_wakeup;
 	/*
 	 *  Since the last background thread run, newly added number of pages
 	 *  that need to be purged by the next wakeup.  This is adjusted on
 	 *  epoch advance, and is used to determine whether we should signal the
 	 *  background thread to wake up earlier.
 	 */
-	size_t			npages_to_purge_new;
+	size_t npages_to_purge_new;
 	/* Stats: total number of runs since started. */
-	uint64_t		tot_n_runs;
+	uint64_t tot_n_runs;
 	/* Stats: total sleep time since started. */
-	nstime_t		tot_sleep_time;
+	nstime_t tot_sleep_time;
 };
 typedef struct background_thread_info_s background_thread_info_t;
 
 struct background_thread_stats_s {
-	size_t num_threads;
-	uint64_t num_runs;
-	nstime_t run_interval;
+	size_t            num_threads;
+	uint64_t          num_runs;
+	nstime_t          run_interval;
 	mutex_prof_data_t max_counter_per_bg_thd;
 };
 typedef struct background_thread_stats_s background_thread_stats_t;
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 9b2c9fb1..f71a874c 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -1,12 +1,19 @@
 #ifndef JEMALLOC_INTERNAL_BASE_H
 #define JEMALLOC_INTERNAL_BASE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/mutex.h"
 
+/*
+ * Alignment when THP is not enabled.  Set to constant 2M in case the HUGEPAGE
+ * value is unexpected high (which would cause VM over-reservation).
+ */
+#define BASE_BLOCK_MIN_ALIGN ((size_t)2 << 20)
+
 enum metadata_thp_mode_e {
-	metadata_thp_disabled   = 0,
+	metadata_thp_disabled = 0,
 	/*
 	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
 	 * + low usage arena (i.e. THP becomes a significant percentage), the
@@ -15,16 +22,15 @@ enum metadata_thp_mode_e {
 	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
 	 * right away.
 	 */
-	metadata_thp_auto       = 1,
-	metadata_thp_always     = 2,
+	metadata_thp_auto = 1,
+	metadata_thp_always = 2,
 	metadata_thp_mode_limit = 3
 };
 typedef enum metadata_thp_mode_e metadata_thp_mode_t;
 
 #define METADATA_THP_DEFAULT metadata_thp_disabled
 extern metadata_thp_mode_t opt_metadata_thp;
-extern const char *metadata_thp_mode_names[];
-
+extern const char *const   metadata_thp_mode_names[];
 
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 typedef struct base_block_s base_block_t;
@@ -72,8 +78,13 @@ struct base_s {
 	/* Heap of extents that track unused trailing space within blocks. */
 	edata_heap_t avail[SC_NSIZES];
 
+	/* Contains reusable base edata (used by tcache_stacks currently). */
+	edata_avail_t edata_avail;
+
 	/* Stats, only maintained if config_stats. */
 	size_t allocated;
+	size_t edata_allocated;
+	size_t rtree_allocated;
 	size_t resident;
 	size_t mapped;
 	/* Number of THP regions touched. */
@@ -91,20 +102,24 @@ metadata_thp_enabled(void) {
 }
 
 base_t *b0get(void);
-base_t *base_new(tsdn_t *tsdn, unsigned ind,
-    const extent_hooks_t *extent_hooks, bool metadata_use_hooks);
-void base_delete(tsdn_t *tsdn, base_t *base);
-ehooks_t *base_ehooks_get(base_t *base);
-ehooks_t *base_ehooks_get_for_metadata(base_t *base);
-extent_hooks_t *base_extent_hooks_set(base_t *base,
-    extent_hooks_t *extent_hooks);
-void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+base_t *base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
+    bool metadata_use_hooks);
+void    base_delete(tsdn_t *tsdn, base_t *base);
+ehooks_t       *base_ehooks_get(base_t *base);
+ehooks_t       *base_ehooks_get_for_metadata(base_t *base);
+extent_hooks_t *base_extent_hooks_set(
+    base_t *base, extent_hooks_t *extent_hooks);
+void    *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
-void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *resident, size_t *mapped, size_t *n_thp);
-void base_prefork(tsdn_t *tsdn, base_t *base);
-void base_postfork_parent(tsdn_t *tsdn, base_t *base);
-void base_postfork_child(tsdn_t *tsdn, base_t *base);
-bool base_boot(tsdn_t *tsdn);
+void    *base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size);
+void    *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
+void     b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
+void     base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+        size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
+        size_t *mapped, size_t *n_thp);
+void     base_prefork(tsdn_t *tsdn, base_t *base);
+void     base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void     base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool     base_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_INTERNAL_BASE_H */
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 63f97395..51d4c89e 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_BIN_H
 #define JEMALLOC_INTERNAL_BIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/bin_info.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
@@ -14,13 +16,13 @@
 typedef struct bin_s bin_t;
 struct bin_s {
 	/* All operations on bin_t fields require lock ownership. */
-	malloc_mutex_t		lock;
+	malloc_mutex_t lock;
 
 	/*
 	 * Bin statistics.  These get touched every time the lock is acquired,
 	 * so put them close by in the hopes of getting some cache locality.
 	 */
-	bin_stats_t	stats;
+	bin_stats_t stats;
 
 	/*
 	 * Current slab being used to service allocations of this bin's size
@@ -28,17 +30,17 @@ struct bin_s {
 	 * slabcur is reassigned, the previous slab must be deallocated or
 	 * inserted into slabs_{nonfull,full}.
 	 */
-	edata_t			*slabcur;
+	edata_t *slabcur;
 
 	/*
 	 * Heap of non-full slabs.  This heap is used to assure that new
 	 * allocations come from the non-full slab that is oldest/lowest in
 	 * memory.
 	 */
-	edata_heap_t		slabs_nonfull;
+	edata_heap_t slabs_nonfull;
 
 	/* List used to track full slabs. */
-	edata_list_active_t	slabs_full;
+	edata_list_active_t slabs_full;
 };
 
 /* A set of sharded bins of the same size class. */
@@ -48,7 +50,7 @@ struct bins_s {
 	bin_t *bin_shards;
 };
 
-void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
+void bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]);
 bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
 
@@ -60,6 +62,43 @@ void bin_prefork(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
+/* Slab region allocation. */
+void *bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info);
+void  bin_slab_reg_alloc_batch(
+     edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs);
+
+/* Slab list management. */
+void     bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab);
+void     bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab);
+edata_t *bin_slabs_nonfull_tryget(bin_t *bin);
+void     bin_slabs_full_insert(bool is_auto, bin_t *bin, edata_t *slab);
+void     bin_slabs_full_remove(bool is_auto, bin_t *bin, edata_t *slab);
+
+/* Slab association / demotion. */
+void bin_dissociate_slab(bool is_auto, edata_t *slab, bin_t *bin);
+void bin_lower_slab(tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+
+/* Deallocation helpers (called under bin lock). */
+void bin_dalloc_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin);
+void bin_dalloc_locked_handle_newly_empty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+void bin_dalloc_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+
+/* Slabcur refill and allocation. */
+void  bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab);
+void *bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab);
+bool  bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, bool is_auto,
+    bin_t *bin);
+void *bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    szind_t binind);
+
+/* Bin selection. */
+bin_t *bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard_p);
+
 /* Stats. */
 static inline void
 bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index 7fe65c86..8c563dee 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_INFO_H
 #define JEMALLOC_INTERNAL_BIN_INFO_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bitmap.h"
 
 /*
@@ -25,22 +26,22 @@
 typedef struct bin_info_s bin_info_t;
 struct bin_info_s {
 	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
+	size_t reg_size;
 
 	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
+	size_t slab_size;
 
 	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
+	uint32_t nregs;
 
 	/* Number of sharded bins in each arena for this size class. */
-	uint32_t		n_shards;
+	uint32_t n_shards;
 
 	/*
 	 * Metadata used to manipulate bitmaps for slabs associated with this
 	 * bin.
 	 */
-	bitmap_info_t		bitmap_info;
+	bitmap_info_t bitmap_info;
 };
 
 extern bin_info_t bin_infos[SC_NBINS];
diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h
new file mode 100644
index 00000000..f4291169
--- /dev/null
+++ b/include/jemalloc/internal/bin_inlines.h
@@ -0,0 +1,112 @@
+#ifndef JEMALLOC_INTERNAL_BIN_INLINES_H
+#define JEMALLOC_INTERNAL_BIN_INLINES_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/bin_info.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/div.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/sc.h"
+
+/*
+ * The dalloc bin info contains just the information that the common paths need
+ * during tcache flushes.  By force-inlining these paths, and using local copies
+ * of data (so that the compiler knows it's constant), we avoid a whole bunch of
+ * redundant loads and stores by leaving this information in registers.
+ */
+typedef struct bin_dalloc_locked_info_s bin_dalloc_locked_info_t;
+struct bin_dalloc_locked_info_s {
+	div_info_t div_info;
+	uint32_t   nregs;
+	uint64_t   ndalloc;
+};
+
+/* Find the region index of a pointer within a slab. */
+JEMALLOC_ALWAYS_INLINE size_t
+bin_slab_regind_impl(
+    div_info_t *div_info, szind_t binind, edata_t *slab, const void *ptr) {
+	size_t diff, regind;
+
+	/* Freeing a pointer outside the slab can cause assertion failure. */
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab))
+	        % (uintptr_t)bin_infos[binind].reg_size
+	    == 0);
+
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(div_info, diff);
+	assert(regind < bin_infos[binind].nregs);
+	return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+bin_slab_regind(bin_dalloc_locked_info_t *info, szind_t binind,
+    edata_t *slab, const void *ptr) {
+	size_t regind = bin_slab_regind_impl(
+	    &info->div_info, binind, slab, ptr);
+	return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_dalloc_locked_begin(
+    bin_dalloc_locked_info_t *info, szind_t binind) {
+	info->div_info = arena_binind_div_info[binind];
+	info->nregs = bin_infos[binind].nregs;
+	info->ndalloc = 0;
+}
+
+/*
+ * Does the deallocation work associated with freeing a single pointer (a
+ * "step") in between a bin_dalloc_locked begin and end call.
+ *
+ * Returns true if arena_slab_dalloc must be called on slab.  Doesn't do
+ * stats updates, which happen during finish (this lets running counts get left
+ * in a register).
+ */
+JEMALLOC_ALWAYS_INLINE bool
+bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    bin_dalloc_locked_info_t *info, szind_t binind, edata_t *slab,
+    void *ptr) {
+	const bin_info_t *bin_info = &bin_infos[binind];
+	size_t            regind = bin_slab_regind(info, binind, slab, ptr);
+	slab_data_t      *slab_data = edata_slab_data_get(slab);
+
+	assert(edata_nfree_get(slab) < bin_info->nregs);
+	/* Freeing an unallocated pointer can cause assertion failure. */
+	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
+
+	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
+	edata_nfree_inc(slab);
+
+	if (config_stats) {
+		info->ndalloc++;
+	}
+
+	unsigned nfree = edata_nfree_get(slab);
+	if (nfree == bin_info->nregs) {
+		bin_dalloc_locked_handle_newly_empty(
+		    tsdn, is_auto, slab, bin);
+		return true;
+	} else if (nfree == 1 && slab != bin->slabcur) {
+		bin_dalloc_locked_handle_newly_nonempty(
+		    tsdn, is_auto, slab, bin);
+	}
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_dalloc_locked_finish(tsdn_t *tsdn, bin_t *bin,
+    bin_dalloc_locked_info_t *info) {
+	if (config_stats) {
+		bin->stats.ndalloc += info->ndalloc;
+		assert(bin->stats.curregs >= (size_t)info->ndalloc);
+		bin->stats.curregs -= (size_t)info->ndalloc;
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_BIN_INLINES_H */
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index 0b99297c..9900e0d1 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_STATS_H
 #define JEMALLOC_INTERNAL_BIN_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex_prof.h"
 
 typedef struct bin_stats_s bin_stats_t;
@@ -11,47 +12,47 @@ struct bin_stats_s {
 	 * many times, resulting many increments to nrequests, but only one
 	 * each to nmalloc and ndalloc.
 	 */
-	uint64_t	nmalloc;
-	uint64_t	ndalloc;
+	uint64_t nmalloc;
+	uint64_t ndalloc;
 
 	/*
 	 * Number of allocation requests that correspond to the size of this
 	 * bin.  This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	uint64_t	nrequests;
+	uint64_t nrequests;
 
 	/*
 	 * Current number of regions of this size class, including regions
 	 * currently cached by tcache.
 	 */
-	size_t		curregs;
+	size_t curregs;
 
 	/* Number of tcache fills from this bin. */
-	uint64_t	nfills;
+	uint64_t nfills;
 
 	/* Number of tcache flushes to this bin. */
-	uint64_t	nflushes;
+	uint64_t nflushes;
 
 	/* Total number of slabs created for this bin's size class. */
-	uint64_t	nslabs;
+	uint64_t nslabs;
 
 	/*
 	 * Total number of slabs reused by extracting them from the slabs heap
 	 * for this bin's size class.
 	 */
-	uint64_t	reslabs;
+	uint64_t reslabs;
 
 	/* Current number of slabs in this bin. */
-	size_t		curslabs;
+	size_t curslabs;
 
 	/* Current size of nonfull slabs heap in this bin. */
-	size_t		nonfull_slabs;
+	size_t nonfull_slabs;
 };
 
 typedef struct bin_stats_data_s bin_stats_data_t;
 struct bin_stats_data_s {
-	bin_stats_t stats_data;
+	bin_stats_t       stats_data;
 	mutex_prof_data_t mutex_data;
 };
 #endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
index 945e8326..b6bad37e 100644
--- a/include/jemalloc/internal/bin_types.h
+++ b/include/jemalloc/internal/bin_types.h
@@ -1,13 +1,17 @@
 #ifndef JEMALLOC_INTERNAL_BIN_TYPES_H
 #define JEMALLOC_INTERNAL_BIN_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 #define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH)
 #define N_BIN_SHARDS_DEFAULT 1
 
 /* Used in TSD static initializer only. Real init in arena_bind(). */
-#define TSD_BINSHARDS_ZERO_INITIALIZER {{UINT8_MAX}}
+#define TSD_BINSHARDS_ZERO_INITIALIZER                                         \
+	{                                                                      \
+		{ UINT8_MAX }                                                  \
+	}
 
 typedef struct tsd_binshards_s tsd_binshards_t;
 struct tsd_binshards_s {
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index bac59140..88c7942e 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -1,12 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_BIT_UTIL_H
 #define JEMALLOC_INTERNAL_BIT_UTIL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /* Sanity check. */
-#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
+#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL)      \
     || !defined(JEMALLOC_INTERNAL_FFS)
-#  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
+#	error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
 /*
@@ -34,6 +35,7 @@ ffs_u(unsigned x) {
 	return JEMALLOC_INTERNAL_FFS(x) - 1;
 }
 
+/* clang-format off */
 #define DO_FLS_SLOW(x, suffix) do {					\
 	util_assume(x != 0);						\
 	x |= (x >> 1);							\
@@ -57,6 +59,7 @@ ffs_u(unsigned x) {
 	}								\
 	return ffs_##suffix(x) - 1;					\
 } while(0)
+/* clang-format on */
 
 static inline unsigned
 fls_llu_slow(unsigned long long x) {
@@ -107,16 +110,19 @@ fls_u(unsigned x) {
 }
 #elif defined(_MSC_VER)
 
-#if LG_SIZEOF_PTR == 3
-#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
-#else
+#	if LG_SIZEOF_PTR == 3
+#		define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
+#	else
 /*
  * This never actually runs; we're just dodging a compiler error for the
  * never-taken branch where sizeof(void *) == 8.
  */
-#define DO_BSR64(bit, x) bit = 0; unreachable()
-#endif
+#		define DO_BSR64(bit, x)                                       \
+			bit = 0;                                               \
+			unreachable()
+#	endif
 
+/* clang-format off */
 #define DO_FLS(x) do {							\
 	if (x == 0) {							\
 		return 8 * sizeof(x);					\
@@ -143,6 +149,7 @@ fls_u(unsigned x) {
 	}								\
 	unreachable();							\
 } while (0)
+/* clang-format on */
 
 static inline unsigned
 fls_llu(unsigned long long x) {
@@ -159,8 +166,8 @@ fls_u(unsigned x) {
 	DO_FLS(x);
 }
 
-#undef DO_FLS
-#undef DO_BSR64
+#	undef DO_FLS
+#	undef DO_BSR64
 #else
 
 static inline unsigned
@@ -180,9 +187,10 @@ fls_u(unsigned x) {
 #endif
 
 #if LG_SIZEOF_LONG_LONG > 3
-#  error "Haven't implemented popcount for 16-byte ints."
+#	error "Haven't implemented popcount for 16-byte ints."
 #endif
 
+/* clang-format off */
 #define DO_POPCOUNT(x, type) do {					\
 	/*								\
 	 * Algorithm from an old AMD optimization reference manual.	\
@@ -226,6 +234,7 @@ fls_u(unsigned x) {
 	x >>= ((sizeof(x) - 1) * 8);					\
 	return (unsigned)x;						\
 } while(0)
+/* clang-format on */
 
 static inline unsigned
 popcount_u_slow(unsigned bitmap) {
@@ -277,7 +286,7 @@ popcount_llu(unsigned long long bitmap) {
  */
 
 static inline size_t
-cfs_lu(unsigned long* bitmap) {
+cfs_lu(unsigned long *bitmap) {
 	util_assume(*bitmap != 0);
 	size_t bit = ffs_lu(*bitmap);
 	*bitmap ^= ZU(1) << bit;
@@ -293,7 +302,7 @@ ffs_zu(size_t x) {
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
 	return ffs_llu(x);
 #else
-#error No implementation for size_t ffs()
+#	error No implementation for size_t ffs()
 #endif
 }
 
@@ -306,11 +315,10 @@ fls_zu(size_t x) {
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
 	return fls_llu(x);
 #else
-#error No implementation for size_t fls()
+#	error No implementation for size_t fls()
 #endif
 }
 
-
 static inline unsigned
 ffs_u64(uint64_t x) {
 #if LG_SIZEOF_LONG == 3
@@ -318,7 +326,7 @@ ffs_u64(uint64_t x) {
 #elif LG_SIZEOF_LONG_LONG == 3
 	return ffs_llu(x);
 #else
-#error No implementation for 64-bit ffs()
+#	error No implementation for 64-bit ffs()
 #endif
 }
 
@@ -329,7 +337,7 @@ fls_u64(uint64_t x) {
 #elif LG_SIZEOF_LONG_LONG == 3
 	return fls_llu(x);
 #else
-#error No implementation for 64-bit fls()
+#	error No implementation for 64-bit fls()
 #endif
 }
 
@@ -338,9 +346,8 @@ ffs_u32(uint32_t x) {
 #if LG_SIZEOF_INT == 2
 	return ffs_u(x);
 #else
-#error No implementation for 32-bit ffs()
+#	error No implementation for 32-bit ffs()
 #endif
-	return ffs_u(x);
 }
 
 static inline unsigned
@@ -348,9 +355,8 @@ fls_u32(uint32_t x) {
 #if LG_SIZEOF_INT == 2
 	return fls_u(x);
 #else
-#error No implementation for 32-bit fls()
+#	error No implementation for 32-bit fls()
 #endif
-	return fls_u(x);
 }
 
 static inline uint64_t
@@ -370,7 +376,7 @@ pow2_ceil_u64(uint64_t x) {
 static inline uint32_t
 pow2_ceil_u32(uint32_t x) {
 	if (unlikely(x <= 1)) {
-	    return x;
+		return x;
 	}
 	size_t msb_on_index = fls_u32(x - 1);
 	/* As above. */
@@ -408,13 +414,16 @@ lg_ceil(size_t x) {
 #define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
 #define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2))
 #define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4))
-#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
-#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
-#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
+#define LG_FLOOR_16(x)                                                         \
+	(x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
+#define LG_FLOOR_32(x)                                                         \
+	(x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
+#define LG_FLOOR_64(x)                                                         \
+	(x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
 #if LG_SIZEOF_PTR == 2
-#  define LG_FLOOR(x) LG_FLOOR_32((x))
+#	define LG_FLOOR(x) LG_FLOOR_32((x))
 #else
-#  define LG_FLOOR(x) LG_FLOOR_64((x))
+#	define LG_FLOOR(x) LG_FLOOR_64((x))
 #endif
 
 #define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1))
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index dc19454d..e0f596fb 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -1,26 +1,27 @@
 #ifndef JEMALLOC_INTERNAL_BITMAP_H
 #define JEMALLOC_INTERNAL_BITMAP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/sc.h"
 
 typedef unsigned long bitmap_t;
-#define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
 #if SC_LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
 /* Maximum bitmap bit count is determined by maximum regions per slab. */
-#  define LG_BITMAP_MAXBITS	SC_LG_SLAB_MAXREGS
+#	define LG_BITMAP_MAXBITS SC_LG_SLAB_MAXREGS
 #else
 /* Maximum bitmap bit count is determined by number of extent size classes. */
-#  define LG_BITMAP_MAXBITS	LG_CEIL(SC_NSIZES)
+#	define LG_BITMAP_MAXBITS LG_CEIL(SC_NSIZES)
 #endif
-#define BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
+#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS)
 
 /* Number of bits per group. */
-#define LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
-#define BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
-#define BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3)
+#define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS)
+#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS - 1)
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
@@ -28,67 +29,64 @@ typedef unsigned long bitmap_t;
  * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
-#  define BITMAP_USE_TREE
+#	define BITMAP_USE_TREE
 #endif
 
 /* Number of groups required to store a given number of bits. */
-#define BITMAP_BITS2GROUPS(nbits)					\
-    (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+#define BITMAP_BITS2GROUPS(nbits)                                              \
+	(((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
 
 /*
  * Number of groups required at a particular level for a given number of bits.
  */
-#define BITMAP_GROUPS_L0(nbits)						\
-    BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_L1(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
-#define BITMAP_GROUPS_L2(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
-#define BITMAP_GROUPS_L3(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS((nbits)))))
-#define BITMAP_GROUPS_L4(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
+#define BITMAP_GROUPS_L0(nbits) BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_L1(nbits) BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define BITMAP_GROUPS_L2(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define BITMAP_GROUPS_L3(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(                                 \
+	    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))))
+#define BITMAP_GROUPS_L4(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(              \
+	    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
 
 /*
  * Assuming the number of levels, number of groups required for a given number
  * of bits.
  */
-#define BITMAP_GROUPS_1_LEVEL(nbits)					\
-    BITMAP_GROUPS_L0(nbits)
-#define BITMAP_GROUPS_2_LEVEL(nbits)					\
-    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
-#define BITMAP_GROUPS_3_LEVEL(nbits)					\
-    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
-#define BITMAP_GROUPS_4_LEVEL(nbits)					\
-    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
-#define BITMAP_GROUPS_5_LEVEL(nbits)					\
-    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
+#define BITMAP_GROUPS_1_LEVEL(nbits) BITMAP_GROUPS_L0(nbits)
+#define BITMAP_GROUPS_2_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define BITMAP_GROUPS_3_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define BITMAP_GROUPS_4_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+#define BITMAP_GROUPS_5_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
 
 /*
  * Maximum number of groups required to support LG_BITMAP_MAXBITS.
  */
 #ifdef BITMAP_USE_TREE
 
-#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_1_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_2_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_3_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_4_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_5_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
-#else
-#  error "Unsupported bitmap size"
-#endif
+#	if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_1_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_2_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_3_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_4_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_5_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
+#	else
+#		error "Unsupported bitmap size"
+#	endif
 
 /*
  * Maximum number of levels possible.  This could be statically computed based
@@ -104,42 +102,53 @@ typedef unsigned long bitmap_t;
  * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
  * are not impacted.
  */
-#define BITMAP_MAX_LEVELS	5
+#	define BITMAP_MAX_LEVELS 5
 
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* nlevels. */							\
-	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
-	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
-	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
-	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
-	/* levels. */							\
-	{								\
-		{0},							\
-		{BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
-		    BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
-		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
-		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
-		     + BITMAP_GROUPS_L0(nbits)}				\
-	}								\
-}
+#	define BITMAP_INFO_INITIALIZER(nbits)                                 \
+		{                                                              \
+			/* nbits. */                                           \
+			nbits, /* nlevels. */                                  \
+			    (BITMAP_GROUPS_L0(nbits)                           \
+			        > BITMAP_GROUPS_L1(nbits))                     \
+			    + (BITMAP_GROUPS_L1(nbits)                         \
+			        > BITMAP_GROUPS_L2(nbits))                     \
+			    + (BITMAP_GROUPS_L2(nbits)                         \
+			        > BITMAP_GROUPS_L3(nbits))                     \
+			    + (BITMAP_GROUPS_L3(nbits)                         \
+			        > BITMAP_GROUPS_L4(nbits))                     \
+			    + 1, /* levels. */                                 \
+			{                                                      \
+				{0}, {BITMAP_GROUPS_L0(nbits)},                \
+				    {BITMAP_GROUPS_L1(nbits)                   \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				    {BITMAP_GROUPS_L2(nbits)                   \
+				        + BITMAP_GROUPS_L1(nbits)              \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				    {BITMAP_GROUPS_L3(nbits)                   \
+				        + BITMAP_GROUPS_L2(nbits)              \
+				        + BITMAP_GROUPS_L1(nbits)              \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				{                                              \
+					BITMAP_GROUPS_L4(nbits)                \
+					    + BITMAP_GROUPS_L3(nbits)          \
+					    + BITMAP_GROUPS_L2(nbits)          \
+					    + BITMAP_GROUPS_L1(nbits)          \
+					    + BITMAP_GROUPS_L0(nbits)          \
+				}                                              \
+			}                                                      \
+		}
 
 #else /* BITMAP_USE_TREE */
 
-#define BITMAP_GROUPS(nbits)	BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+#	define BITMAP_GROUPS(nbits) BITMAP_BITS2GROUPS(nbits)
+#	define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
 
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* ngroups. */							\
-	BITMAP_BITS2GROUPS(nbits)					\
-}
+#	define BITMAP_INFO_INITIALIZER(nbits)                                 \
+		{                                                              \
+			/* nbits. */                                           \
+			nbits, /* ngroups. */                                  \
+			    BITMAP_BITS2GROUPS(nbits)                          \
+		}
 
 #endif /* BITMAP_USE_TREE */
 
@@ -160,21 +169,21 @@ typedef struct bitmap_info_s {
 	 * Only the first (nlevels+1) elements are used, and levels are ordered
 	 * bottom to top (e.g. the bottom level is stored in levels[0]).
 	 */
-	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-#else /* BITMAP_USE_TREE */
+	bitmap_level_t levels[BITMAP_MAX_LEVELS + 1];
+#else  /* BITMAP_USE_TREE */
 	/* Number of groups necessary for nbits. */
 	size_t ngroups;
 #endif /* BITMAP_USE_TREE */
 } bitmap_info_t;
 
-void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
+void   bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+void   bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
 size_t bitmap_size(const bitmap_info_t *binfo);
 
 static inline bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 #ifdef BITMAP_USE_TREE
-	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	size_t   rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
 	bitmap_t rg = bitmap[rgoff];
 	/* The bitmap is full iff the root group is 0. */
 	return (rg == 0);
@@ -192,7 +201,7 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 static inline bool
 bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
+	size_t   goff;
 	bitmap_t g;
 
 	assert(bit < binfo->nbits);
@@ -203,9 +212,9 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 
 static inline void
 bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
+	size_t    goff;
 	bitmap_t *gp;
-	bitmap_t g;
+	bitmap_t  g;
 
 	assert(bit < binfo->nbits);
 	assert(!bitmap_get(bitmap, binfo, bit));
@@ -244,12 +253,13 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 #ifdef BITMAP_USE_TREE
 	size_t bit = 0;
 	for (unsigned level = binfo->nlevels; level--;) {
-		size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
-		    1));
-		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
-		    >> lg_bits_per_group)];
-		unsigned group_nmask = (unsigned)(((min_bit > bit) ? (min_bit -
-		    bit) : 0) >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS));
+		size_t   lg_bits_per_group = (LG_BITMAP_GROUP_NBITS
+                    * (level + 1));
+		bitmap_t group = bitmap[binfo->levels[level].group_offset
+		    + (bit >> lg_bits_per_group)];
+		unsigned group_nmask =
+		    (unsigned)(((min_bit > bit) ? (min_bit - bit) : 0)
+		        >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS));
 		assert(group_nmask <= BITMAP_GROUP_NBITS);
 		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
 		bitmap_t group_masked = group & group_mask;
@@ -272,25 +282,28 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 			}
 			return bitmap_ffu(bitmap, binfo, sib_base);
 		}
-		bit += ((size_t)ffs_lu(group_masked)) <<
-		    (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
+		bit += ((size_t)ffs_lu(group_masked))
+		    << (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
 	}
 	assert(bit >= min_bit);
 	assert(bit < binfo->nbits);
 	return bit;
 #else
-	size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
-	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
-	    - 1);
+	size_t   i = min_bit >> LG_BITMAP_GROUP_NBITS;
+	bitmap_t g = bitmap[i]
+	    & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK)) - 1);
 	size_t bit;
-	do {
+	while (1) {
 		if (g != 0) {
 			bit = ffs_lu(g);
 			return (i << LG_BITMAP_GROUP_NBITS) + bit;
 		}
 		i++;
+		if (i >= binfo->ngroups) {
+			break;
+		}
 		g = bitmap[i];
-	} while (i < binfo->ngroups);
+	}
 	return binfo->nbits;
 #endif
 }
@@ -298,7 +311,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 /* sfu: set first unset. */
 static inline size_t
 bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
-	size_t bit;
+	size_t   bit;
 	bitmap_t g;
 	unsigned i;
 
@@ -328,9 +341,9 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 static inline void
 bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
+	size_t      goff;
+	bitmap_t   *gp;
+	bitmap_t    g;
 	UNUSED bool propagate;
 
 	assert(bit < binfo->nbits);
diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 37aa6de5..5ee9af4e 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_BUF_WRITER_H
 #define JEMALLOC_INTERNAL_BUF_WRITER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * Note: when using the buffered writer, cbopaque is passed to write_cb only
  * when the buffer is flushed.  It would make a difference if cbopaque points
@@ -12,21 +16,21 @@
 
 typedef struct {
 	write_cb_t *write_cb;
-	void *cbopaque;
-	char *buf;
-	size_t buf_size;
-	size_t buf_end;
-	bool internal_buf;
+	void       *cbopaque;
+	char       *buf;
+	size_t      buf_size;
+	size_t      buf_end;
+	bool        internal_buf;
 } buf_writer_t;
 
-bool buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
-    write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
-void buf_writer_flush(buf_writer_t *buf_writer);
+bool       buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
+          write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
+void       buf_writer_flush(buf_writer_t *buf_writer);
 write_cb_t buf_writer_cb;
-void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
+void       buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
-typedef ssize_t (read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
-void buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
-    void *read_cbopaque);
+typedef ssize_t(read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
+void buf_writer_pipe(
+    buf_writer_t *buf_writer, read_cb_t *read_cb, void *read_cbopaque);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index caf5be33..bea3a2fc 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 
 /*
@@ -20,16 +23,20 @@
  */
 typedef uint16_t cache_bin_sz_t;
 
+#define JUNK_ADDR ((uintptr_t)0x7a7a7a7a7a7a7a7aULL)
 /*
  * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
  * bug starts leaking those.  Make it look like the junk pattern but be distinct
  * from it.
  */
-static const uintptr_t cache_bin_preceding_junk =
-    (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
-/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
-static const uintptr_t cache_bin_trailing_junk =
-    (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+static const uintptr_t cache_bin_preceding_junk = JUNK_ADDR;
+/* Note: JUNK_ADDR vs. JUNK_ADDR + 1 -- this tells you which pointer leaked. */
+static const uintptr_t cache_bin_trailing_junk = JUNK_ADDR + 1;
+/*
+ * A pointer used to initialize a fake stack_head for disabled small bins
+ * so that the enabled/disabled assessment does not rely on ncached_max.
+ */
+extern const uintptr_t disabled_bin;
 
 /*
  * That implies the following value, for the maximum number of items in any
@@ -38,8 +45,8 @@ static const uintptr_t cache_bin_trailing_junk =
  *   1 << (sizeof(cache_bin_sz_t) * 8)
  * bytes spread across pointer sized objects to get the maximum.
  */
-#define CACHE_BIN_NCACHED_MAX (((size_t)1 << sizeof(cache_bin_sz_t) * 8) \
-    / sizeof(void *) - 1)
+#define CACHE_BIN_NCACHED_MAX                                                  \
+	(((size_t)1 << sizeof(cache_bin_sz_t) * 8) / sizeof(void *) - 1)
 
 /*
  * This lives inside the cache_bin (for locality reasons), and is initialized
@@ -101,7 +108,7 @@ struct cache_bin_s {
 	 * Since the stack grows down, this is a higher address than
 	 * low_bits_full.
 	 */
-	uint16_t low_bits_low_water;
+	cache_bin_sz_t low_bits_low_water;
 
 	/*
 	 * The low bits of the value that stack_head will take on when the array
@@ -112,7 +119,7 @@ struct cache_bin_s {
 	 * Recall that since the stack grows down, this is the lowest available
 	 * address in the array for caching.  Only adjusted when stashing items.
 	 */
-	uint16_t low_bits_full;
+	cache_bin_sz_t low_bits_full;
 
 	/*
 	 * The low bits of the value that stack_head will take on when the array
@@ -121,7 +128,10 @@ struct cache_bin_s {
 	 * The stack grows down -- this is one past the highest address in the
 	 * array.  Immutable after initialization.
 	 */
-	uint16_t low_bits_empty;
+	cache_bin_sz_t low_bits_empty;
+
+	/* The maximum number of cached items in the bin. */
+	cache_bin_info_t bin_info;
 };
 
 /*
@@ -142,8 +152,8 @@ struct cache_bin_array_descriptor_s {
 };
 
 static inline void
-cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
-    cache_bin_t *bins) {
+cache_bin_array_descriptor_init(
+    cache_bin_array_descriptor_t *descriptor, cache_bin_t *bins) {
 	ql_elm_new(descriptor, link);
 	descriptor->bins = bins;
 }
@@ -168,10 +178,41 @@ cache_bin_nonfast_aligned(const void *ptr) {
 	return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0;
 }
 
+static inline const void *
+cache_bin_disabled_bin_stack(void) {
+	return &disabled_bin;
+}
+
+/*
+ * If a cache bin was zero initialized (either because it lives in static or
+ * thread-local storage, or was memset to 0), this function indicates whether or
+ * not cache_bin_init was called on it.
+ */
+static inline bool
+cache_bin_still_zero_initialized(cache_bin_t *bin) {
+	return bin->stack_head == NULL;
+}
+
+static inline bool
+cache_bin_disabled(cache_bin_t *bin) {
+	bool disabled = (bin->stack_head == cache_bin_disabled_bin_stack());
+	if (disabled) {
+		assert((uintptr_t)(*bin->stack_head) == JUNK_ADDR);
+	}
+	return disabled;
+}
+
+/* Gets ncached_max without asserting that the bin is enabled. */
+static inline cache_bin_sz_t
+cache_bin_ncached_max_get_unsafe(cache_bin_t *bin) {
+	return bin->bin_info.ncached_max;
+}
+
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
-cache_bin_info_ncached_max(cache_bin_info_t *info) {
-	return info->ncached_max;
+cache_bin_ncached_max_get(cache_bin_t *bin) {
+	assert(!cache_bin_disabled(bin));
+	return cache_bin_ncached_max_get_unsafe(bin);
 }
 
 /*
@@ -181,7 +222,8 @@ cache_bin_info_ncached_max(cache_bin_info_t *info) {
  * with later.
  */
 static inline void
-cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+cache_bin_assert_earlier(
+    cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
 	if (earlier > later) {
 		assert(bin->low_bits_full > bin->low_bits_empty);
 	}
@@ -193,28 +235,19 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * Does difference calculations that handle wraparound correctly.  Earlier must
  * be associated with the position earlier in memory.
  */
-static inline uint16_t
-cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later, bool racy) {
-	/*
-	 * When it's racy, bin->low_bits_full can be modified concurrently. It
-	 * can cross the uint16_t max value and become less than
-	 * bin->low_bits_empty at the time of the check.
-	 */
-	if (!racy) {
-		cache_bin_assert_earlier(bin, earlier, later);
-	}
+static inline cache_bin_sz_t
+cache_bin_diff(cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
+	cache_bin_assert_earlier(bin, earlier, later);
 	return later - earlier;
 }
 
 /*
  * Number of items currently cached in the bin, without checking ncached_max.
- * We require specifying whether or not the request is racy or not (i.e. whether
- * or not concurrent modifications are possible).
  */
 static inline cache_bin_sz_t
-cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
+cache_bin_ncached_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty, racy);
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
 	/*
 	 * We have undefined behavior here; if this function is called from the
@@ -225,7 +258,7 @@ cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
 	 * fast paths.  This should still be "safe" in the sense of generating
 	 * the correct assembly for the foreseeable future, though.
 	 */
-	assert(n == 0 || *(bin->stack_head) != NULL || racy);
+	assert(n == 0 || *(bin->stack_head) != NULL);
 	return n;
 }
 
@@ -235,10 +268,9 @@ cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
  * possible.
  */
 static inline cache_bin_sz_t
-cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
-	    /* racy */ false);
-	assert(n <= cache_bin_info_ncached_max(info));
+cache_bin_ncached_get_local(cache_bin_t *bin) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
+	assert(n <= cache_bin_ncached_max_get(bin));
 	return n;
 }
 
@@ -253,10 +285,9 @@ cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty,
-	    /* racy */ false);
-	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
-	void **ret = (void **)empty_bits;
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	byte_t        *empty_bits = (byte_t *)bin->stack_head + diff;
+	void         **ret = (void **)empty_bits;
 
 	assert(ret >= bin->stack_head);
 
@@ -273,10 +304,10 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  * multithreaded environment. Currently concurrent access happens only during
  * arena statistics collection.
  */
-static inline uint16_t
-cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	return (uint16_t)bin->low_bits_empty -
-	    info->ncached_max * sizeof(void *);
+static inline cache_bin_sz_t
+cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
+	return (cache_bin_sz_t)bin->low_bits_empty
+	    - cache_bin_ncached_max_get(bin) * sizeof(void *);
 }
 
 /*
@@ -285,9 +316,9 @@ cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
  * A pointer to the position with the lowest address of the backing array.
  */
 static inline void **
-cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
-	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
+cache_bin_low_bound_get(cache_bin_t *bin) {
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
+	void         **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
 
 	return ret;
@@ -298,8 +329,8 @@ cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
  * batch fill a nonempty cache bin.
  */
 static inline void
-cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
-	assert(cache_bin_ncached_get_local(bin, info) == 0);
+cache_bin_assert_empty(cache_bin_t *bin) {
+	assert(cache_bin_ncached_get_local(bin) == 0);
 	assert(cache_bin_empty_position_get(bin) == bin->stack_head);
 }
 
@@ -310,18 +341,19 @@ cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
  */
 static inline cache_bin_sz_t
 cache_bin_low_water_get_internal(cache_bin_t *bin) {
-	return cache_bin_diff(bin, bin->low_bits_low_water,
-	    bin->low_bits_empty, /* racy */ false) / sizeof(void *);
+	return cache_bin_diff(bin, bin->low_bits_low_water, bin->low_bits_empty)
+	    / sizeof(void *);
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
-cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_water_get(cache_bin_t *bin) {
 	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
-	assert(low_water <= cache_bin_info_ncached_max(info));
-	assert(low_water <= cache_bin_ncached_get_local(bin, info));
+	assert(low_water <= cache_bin_ncached_max_get(bin));
+	assert(low_water <= cache_bin_ncached_get_local(bin));
 
-	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
+	cache_bin_assert_earlier(bin,
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
 
 	return low_water;
@@ -333,12 +365,14 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
  */
 static inline void
 cache_bin_low_water_set(cache_bin_t *bin) {
-	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
+	assert(!cache_bin_disabled(bin));
+	bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 }
 
 static inline void
 cache_bin_low_water_adjust(cache_bin_t *bin) {
-	if (cache_bin_ncached_get_internal(bin, /* racy */ false)
+	assert(!cache_bin_disabled(bin));
+	if (cache_bin_ncached_get_internal(bin)
 	    < cache_bin_low_water_get_internal(bin)) {
 		cache_bin_low_water_set(bin);
 	}
@@ -358,9 +392,9 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
-	void *ret = *bin->stack_head;
-	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
-	void **new_head = bin->stack_head + 1;
+	void          *ret = *bin->stack_head;
+	cache_bin_sz_t low_bits = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	void         **new_head = bin->stack_head + 1;
 
 	/*
 	 * Note that the low water mark is at most empty; if we pass this check,
@@ -382,7 +416,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	 */
 	if (likely(low_bits != bin->low_bits_empty)) {
 		bin->stack_head = new_head;
-		bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
+		bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head;
 		*success = true;
 		return ret;
 	}
@@ -410,8 +444,7 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
-	    /* racy */ false);
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
 	if (n > num) {
 		n = (cache_bin_sz_t)num;
 	}
@@ -424,7 +457,37 @@ cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
 
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_full(cache_bin_t *bin) {
-	return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+	return (
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+}
+
+/*
+ * Scans the allocated area of the cache_bin for the given pointer up to limit.
+ * Fires safety_check_fail if the ptr is found and returns true.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) {
+	if (!config_debug || opt_debug_double_free_max_scan == 0) {
+		return false;
+	}
+
+	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin);
+	unsigned       max_scan = opt_debug_double_free_max_scan < ncached
+	          ? opt_debug_double_free_max_scan
+	          : ncached;
+
+	void **cur = bin->stack_head;
+	void **limit = cur + max_scan;
+	for (; cur < limit; cur++) {
+		if (*cur == ptr) {
+			safety_check_fail(
+			    "Invalid deallocation detected: double free of "
+			    "pointer %p\n",
+			    ptr);
+			return true;
+		}
+	}
+	return false;
 }
 
 /*
@@ -436,10 +499,14 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 		return false;
 	}
 
+	if (unlikely(cache_bin_dalloc_safety_checks(bin, ptr))) {
+		return true;
+	}
+
 	bin->stack_head--;
 	*bin->stack_head = ptr;
 	cache_bin_assert_earlier(bin, bin->low_bits_full,
-	    (uint16_t)(uintptr_t)bin->stack_head);
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head);
 
 	return true;
 }
@@ -452,11 +519,12 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	}
 
 	/* Stash at the full position, in the [full, head) range. */
-	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t low_bits_head = (cache_bin_sz_t)(uintptr_t)
+	                                   bin->stack_head;
 	/* Wraparound handled as well. */
-	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head,
-	    /* racy */ false);
-	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
+	cache_bin_sz_t diff = cache_bin_diff(
+	    bin, bin->low_bits_full, low_bits_head);
+	*(void **)((byte_t *)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
 	bin->low_bits_full += sizeof(void *);
@@ -465,67 +533,101 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
-/*
- * Get the number of stashed pointers.
- *
- * When called from a thread not owning the TLS (i.e. racy = true), it's
- * important to keep in mind that 'bin->stack_head' and 'bin->low_bits_full' can
- * be modified concurrently and almost none assertions about their values can be
- * made.
- */
+/* Get the number of stashed pointers. */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
-    bool racy) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
-	    info);
+cache_bin_nstashed_get_internal(cache_bin_t *bin) {
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(
+	    bin);
 
-	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
-	    bin->low_bits_full, racy) / sizeof(void *);
+	cache_bin_sz_t n = cache_bin_diff(
+	                       bin, low_bits_low_bound, bin->low_bits_full)
+	    / sizeof(void *);
 	assert(n <= ncached_max);
-
-	if (!racy) {
+	if (config_debug && n != 0) {
 		/* Below are for assertions only. */
-		void **low_bound = cache_bin_low_bound_get(bin, info);
+		void **low_bound = cache_bin_low_bound_get(bin);
 
-		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+		assert(
+		    (cache_bin_sz_t)(uintptr_t)low_bound == low_bits_low_bound);
 		void *stashed = *(low_bound + n - 1);
-		bool aligned = cache_bin_nonfast_aligned(stashed);
+		bool  aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
 		/* Allow arbitrary pointers to be stashed in tests. */
 		aligned = true;
 #endif
-		assert(n == 0 || (stashed != NULL && aligned));
+		assert(stashed != NULL && aligned);
 	}
 
 	return n;
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info,
-	    /* racy */ false);
-	assert(n <= cache_bin_info_ncached_max(info));
+cache_bin_nstashed_get_local(cache_bin_t *bin) {
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin);
+	assert(n <= cache_bin_ncached_max_get(bin));
 	return n;
 }
 
 /*
  * Obtain a racy view of the number of items currently in the cache bin, in the
  * presence of possible concurrent modifications.
+ *
+ * Note that this is the only racy function in this header.  Any other functions
+ * are assumed to be non-racy.  The "racy" term here means accessed from another
+ * thread (that is not the owner of the specific cache bin).  This only happens
+ * when gathering stats (read-only).  The only change because of the racy
+ * condition is that assertions based on mutable fields are omitted.
+ *
+ * It's important to keep in mind that 'bin->stack_head' and
+ * 'bin->low_bits_full' can be modified concurrently and almost no assertions
+ * about their values can be made.
+ *
+ * This function should not call other utility functions because the racy
+ * condition may cause unexpected / undefined behaviors in unverified utility
+ * functions.  Currently, this function calls two utility functions
+ * cache_bin_ncached_max_get and cache_bin_low_bits_low_bound_get because
+ * they help access values that will not be concurrently modified.
  */
 static inline void
-cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, /* racy */ true);
-	assert(n <= cache_bin_info_ncached_max(info));
+cache_bin_nitems_get_remote(
+    cache_bin_t *bin, cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
+	/* Racy version of cache_bin_ncached_get_internal. */
+	cache_bin_sz_t diff = bin->low_bits_empty
+	    - (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t n = diff / sizeof(void *);
 	*ncached = n;
 
-	n = cache_bin_nstashed_get_internal(bin, info, /* racy */ true);
-	assert(n <= cache_bin_info_ncached_max(info));
+	/* Racy version of cache_bin_nstashed_get_internal. */
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(
+	    bin);
+	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 	*nstashed = n;
-	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
+	/*
+	 * Note that cannot assert anything regarding ncached_max because
+	 * it can be configured on the fly and is thus racy.
+	 */
 }
 
+/*
+ * For small bins, used to calculate how many items to fill at a time.
+ * The final nfill is calculated by (ncached_max >> (base - offset)).
+ */
+typedef struct cache_bin_fill_ctl_s cache_bin_fill_ctl_t;
+struct cache_bin_fill_ctl_s {
+	uint8_t base;
+	uint8_t offset;
+};
+
+/*
+ * Limit how many items can be flushed in a batch (Which is the upper bound
+ * for the nflush parameter in tcache_bin_flush_impl()).
+ * This is to avoid stack overflow when we do batch edata look up, which
+ * reserves a nflush * sizeof(emap_batch_lookup_result_t) stack variable.
+ */
+#define CACHE_BIN_NFLUSH_BATCH_MAX                                             \
+	((VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR) - 1)
+
 /*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,
  * the arrays go forward, and can be accessed with ordinary array arithmetic.
@@ -546,7 +648,7 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
 struct cache_bin_ptr_array_s {
 	cache_bin_sz_t n;
-	void **ptr;
+	void         **ptr;
 };
 
 /*
@@ -558,18 +660,18 @@ struct cache_bin_ptr_array_s {
  * representations is easy (since they'll require an alloca in the calling
  * frame).
  */
-#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)				\
-    cache_bin_ptr_array_t name;						\
-    name.n = (nval)
+#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)                                \
+	cache_bin_ptr_array_t name;                                            \
+	name.n = (nval)
 
 /*
  * Start a fill.  The bin must be empty, and This must be followed by a
  * finish_fill call before doing any alloc/dalloc operations on the bin.
  */
 static inline void
-cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
-	cache_bin_assert_empty(bin, info);
+cache_bin_init_ptr_array_for_fill(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
+	cache_bin_assert_empty(bin);
 	arr->ptr = cache_bin_empty_position_get(bin) - nfill;
 }
 
@@ -579,15 +681,19 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
  * case of OOM.
  */
 static inline void
-cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
-	cache_bin_assert_empty(bin, info);
+cache_bin_finish_fill(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
+	cache_bin_assert_empty(bin);
 	void **empty_position = cache_bin_empty_position_get(bin);
 	if (nfilled < arr->n) {
 		memmove(empty_position - nfilled, empty_position - arr->n,
 		    nfilled * sizeof(void *));
 	}
 	bin->stack_head = empty_position - nfilled;
+	/* Reset the bin stats as it's merged during fill. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 /*
@@ -595,55 +701,61 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
  * everything we give them.
  */
 static inline void
-cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
+cache_bin_init_ptr_array_for_flush(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
 	arr->ptr = cache_bin_empty_position_get(bin) - nflush;
-	assert(cache_bin_ncached_get_local(bin, info) == 0
-	    || *arr->ptr != NULL);
+	assert(cache_bin_ncached_get_local(bin) == 0 || *arr->ptr != NULL);
 }
 
 static inline void
-cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
-	unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed;
-	memmove(bin->stack_head + nflushed, bin->stack_head,
-	    rem * sizeof(void *));
-	bin->stack_head = bin->stack_head + nflushed;
+cache_bin_finish_flush(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
+	unsigned rem = cache_bin_ncached_get_local(bin) - nflushed;
+	memmove(
+	    bin->stack_head + nflushed, bin->stack_head, rem * sizeof(void *));
+	bin->stack_head += nflushed;
 	cache_bin_low_water_adjust(bin);
+	/* Reset the bin stats as it's merged during flush. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 static inline void
 cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
-    cache_bin_info_t *info, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nstashed) {
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nstashed) {
 	assert(nstashed > 0);
-	assert(cache_bin_nstashed_get_local(bin, info) == nstashed);
+	assert(cache_bin_nstashed_get_local(bin) == nstashed);
 
-	void **low_bound = cache_bin_low_bound_get(bin, info);
+	void **low_bound = cache_bin_low_bound_get(bin);
 	arr->ptr = low_bound;
 	assert(*arr->ptr != NULL);
 }
 
 static inline void
-cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
-	void **low_bound = cache_bin_low_bound_get(bin, info);
+cache_bin_finish_flush_stashed(cache_bin_t *bin) {
+	void **low_bound = cache_bin_low_bound_get(bin);
 
 	/* Reset the bin local full position. */
 	bin->low_bits_full = (uint16_t)(uintptr_t)low_bound;
-	assert(cache_bin_nstashed_get_local(bin, info) == 0);
+	assert(cache_bin_nstashed_get_local(bin) == 0);
+	/* Reset the bin stats as it's merged during flush. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 /*
  * Initialize a cache_bin_info to represent up to the given number of items in
  * the cache_bins it is associated with.
  */
-void cache_bin_info_init(cache_bin_info_t *bin_info,
-    cache_bin_sz_t ncached_max);
+void cache_bin_info_init(
+    cache_bin_info_t *bin_info, cache_bin_sz_t ncached_max);
 /*
  * Given an array of initialized cache_bin_info_ts, determine how big an
  * allocation is required to initialize a full set of cache_bin_ts.
  */
-void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+void cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
     size_t *size, size_t *alignment);
 
 /*
@@ -653,18 +765,13 @@ void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
  * cache_bin_postincrement.  *alloc_cur will then point immediately past the end
  * of the allocation.
  */
-void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
+void cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos,
     void *alloc, size_t *cur_offset);
-void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
-    void *alloc, size_t *cur_offset);
-void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+void cache_bin_postincrement(void *alloc, size_t *cur_offset);
+void cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
     size_t *cur_offset);
+void cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max);
 
-/*
- * If a cache bin was zero initialized (either because it lives in static or
- * thread-local storage, or was memset to 0), this function indicates whether or
- * not cache_bin_init was called on it.
- */
-bool cache_bin_still_zero_initialized(cache_bin_t *bin);
+bool cache_bin_stack_use_thp(void);
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 7b3850bc..01b27e8f 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_CKH_H
 #define JEMALLOC_INTERNAL_CKH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /* Cuckoo hashing implementation.  Skip to the end for the interface. */
@@ -21,8 +22,8 @@
 #define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
 
 /* Typedefs to allow easy function pointer passing. */
-typedef void ckh_hash_t (const void *, size_t[2]);
-typedef bool ckh_keycomp_t (const void *, const void *);
+typedef void ckh_hash_t(const void *, size_t[2]);
+typedef bool ckh_keycomp_t(const void *, const void *);
 
 /* Hash table cell. */
 typedef struct {
@@ -55,7 +56,7 @@ typedef struct {
 	unsigned lg_curbuckets;
 
 	/* Hash and comparison functions. */
-	ckh_hash_t *hash;
+	ckh_hash_t    *hash;
 	ckh_keycomp_t *keycomp;
 
 	/* Hash table with 2^lg_curbuckets buckets. */
@@ -88,8 +89,8 @@ bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
  * the key and value, and doesn't do any lifetime management.
  */
 bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
-bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data);
+bool ckh_remove(
+    tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data);
 bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 
 /* Some useful hash and comparison functions for strings and pointers. */
diff --git a/include/jemalloc/internal/conf.h b/include/jemalloc/internal/conf.h
new file mode 100644
index 00000000..26983ee9
--- /dev/null
+++ b/include/jemalloc/internal/conf.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_INTERNAL_CONF_H
+#define JEMALLOC_INTERNAL_CONF_H
+
+#include "jemalloc/internal/sc.h"
+
+void malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    char readlink_buf[PATH_MAX + 1]);
+void malloc_abort_invalid_conf(void);
+
+#ifdef JEMALLOC_JET
+extern bool had_conf_error;
+
+bool conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+    char const **v_p, size_t *vlen_p);
+void conf_error(
+    const char *msg, const char *k, size_t klen, const char *v, size_t vlen);
+bool conf_handle_bool(const char *v, size_t vlen, bool *result);
+bool conf_handle_signed(const char *v, size_t vlen, intmax_t min, intmax_t max,
+    bool check_min, bool check_max, bool clip, intmax_t *result);
+bool conf_handle_char_p(const char *v, size_t vlen, char *dest, size_t dest_sz);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_CONF_H */
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 79abf064..0f38d40c 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -1,12 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_COUNTER_H
 #define JEMALLOC_INTERNAL_COUNTER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
 
 typedef struct counter_accum_s {
 	LOCKEDINT_MTX_DECLARE(mtx)
 	locked_u64_t accumbytes;
-	uint64_t interval;
+	uint64_t     interval;
 } counter_accum_t;
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 63d27f8a..82035fe3 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_CTL_H
 #define JEMALLOC_INTERNAL_CTL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_stats.h"
+#include "jemalloc/internal/background_thread_structs.h"
+#include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex_prof.h"
@@ -9,50 +13,52 @@
 #include "jemalloc/internal/stats.h"
 
 /* Maximum ctl tree depth. */
-#define CTL_MAX_DEPTH	7
+#define CTL_MAX_DEPTH 7
+#define CTL_MULTI_SETTING_MAX_LEN 1000
 
 typedef struct ctl_node_s {
 	bool named;
 } ctl_node_t;
 
 typedef struct ctl_named_node_s {
-	ctl_node_t node;
+	ctl_node_t  node;
 	const char *name;
 	/* If (nchildren == 0), this is a terminal node. */
-	size_t nchildren;
+	size_t            nchildren;
 	const ctl_node_t *children;
-	int (*ctl)(tsd_t *, const size_t *, size_t, void *, size_t *, void *,
-	    size_t);
+	int (*ctl)(
+	    tsd_t *, const size_t *, size_t, void *, size_t *, void *, size_t);
 } ctl_named_node_t;
 
 typedef struct ctl_indexed_node_s {
 	struct ctl_node_s node;
-	const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t,
-	    size_t);
+	const ctl_named_node_t *(*index)(
+	    tsdn_t *, const size_t *, size_t, size_t);
 } ctl_indexed_node_t;
 
 typedef struct ctl_arena_stats_s {
 	arena_stats_t astats;
 
 	/* Aggregate stats for small size classes, based on bin stats. */
-	size_t allocated_small;
+	size_t   allocated_small;
 	uint64_t nmalloc_small;
 	uint64_t ndalloc_small;
 	uint64_t nrequests_small;
 	uint64_t nfills_small;
 	uint64_t nflushes_small;
 
-	bin_stats_data_t bstats[SC_NBINS];
+	bin_stats_data_t    bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
-	pac_estats_t estats[SC_NPSIZES];
-	hpa_shard_stats_t hpastats;
-	sec_stats_t secstats;
+	pac_estats_t        estats[SC_NPSIZES];
+	hpa_shard_stats_t   hpastats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
 	size_t allocated;
 	size_t active;
 	size_t metadata;
+	size_t metadata_edata;
+	size_t metadata_rtree;
 	size_t metadata_thp;
 	size_t resident;
 	size_t mapped;
@@ -65,17 +71,17 @@ typedef struct ctl_stats_s {
 typedef struct ctl_arena_s ctl_arena_t;
 struct ctl_arena_s {
 	unsigned arena_ind;
-	bool initialized;
+	bool     initialized;
 	ql_elm(ctl_arena_t) destroyed_link;
 
 	/* Basic stats, supported even if !config_stats. */
-	unsigned nthreads;
+	unsigned    nthreads;
 	const char *dss;
-	ssize_t dirty_decay_ms;
-	ssize_t muzzy_decay_ms;
-	size_t pactive;
-	size_t pdirty;
-	size_t pmuzzy;
+	ssize_t     dirty_decay_ms;
+	ssize_t     muzzy_decay_ms;
+	size_t      pactive;
+	size_t      pdirty;
+	size_t      pmuzzy;
 
 	/* NULL if !config_stats. */
 	ctl_arena_stats_t *astats;
@@ -100,60 +106,67 @@ int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
 int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
-int ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp);
-int ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+int ctl_mibnametomib(
+    tsd_t *tsd, size_t *mib, size_t miblen, const char *name, size_t *miblenp);
+int  ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+     size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
 void ctl_postfork_child(tsdn_t *tsdn);
 void ctl_mtx_assert_held(tsdn_t *tsdn);
 
-#define xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
-	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
-	    != 0) {							\
-		malloc_printf(						\
-		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
-		    name);						\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctl(name, oldp, oldlenp, newp, newlen)                            \
+	do {                                                                   \
+		if (je_mallctl(name, oldp, oldlenp, newp, newlen) != 0) {      \
+			malloc_printf(                                         \
+			    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",  \
+			    name);                                             \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlnametomib(name, mibp, miblenp) do {			\
-	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
-		malloc_printf("<jemalloc>: Failure in "			\
-		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlnametomib(name, mibp, miblenp)                                 \
+	do {                                                                   \
+		if (je_mallctlnametomib(name, mibp, miblenp) != 0) {           \
+			malloc_printf(                                         \
+			    "<jemalloc>: Failure in "                          \
+			    "xmallctlnametomib(\"%s\", ...)\n",                \
+			    name);                                             \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
-	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
-	    newlen) != 0) {						\
-		malloc_write(						\
-		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen)                \
+	do {                                                                   \
+		if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen)  \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in xmallctlbymib()\n");       \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlmibnametomib(mib, miblen, name, miblenp) do {		\
-	if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp)	\
-	    != 0) {							\
-		malloc_write(						\
-		    "<jemalloc>: Failure in ctl_mibnametomib()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlmibnametomib(mib, miblen, name, miblenp)                       \
+	do {                                                                   \
+		if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp)  \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in ctl_mibnametomib()\n");    \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlbymibname(mib, miblen, name, miblenp, oldp, oldlenp,	\
-    newp, newlen) do {							\
-	if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp,	\
-	    oldp, oldlenp, newp, newlen) != 0) {			\
-		malloc_write(						\
-		    "<jemalloc>: Failure in ctl_bymibname()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlbymibname(                                                     \
+    mib, miblen, name, miblenp, oldp, oldlenp, newp, newlen)                   \
+	do {                                                                   \
+		if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp,     \
+		        oldp, oldlenp, newp, newlen)                           \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in ctl_bymibname()\n");       \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
 #endif /* JEMALLOC_INTERNAL_CTL_H */
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index cf6a9d22..e8773655 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -1,9 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_DECAY_H
 #define JEMALLOC_INTERNAL_DECAY_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/smoothstep.h"
 
-#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
+#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t) - 1)
 
 /*
  * The decay_t computes the number of pages we should purge at any given time.
@@ -166,12 +168,12 @@ void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 /*
  * Compute how many of 'npages_new' pages we would need to purge in 'time'.
  */
-uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time,
-    size_t npages_new);
+uint64_t decay_npages_purge_in(
+    decay_t *decay, nstime_t *time, size_t npages_new);
 
 /* Returns true if the epoch advanced and there are pages to purge. */
-bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
-    size_t current_npages);
+bool decay_maybe_advance_epoch(
+    decay_t *decay, nstime_t *new_time, size_t current_npages);
 
 /*
  * Calculates wait time until a number of pages in the interval
@@ -180,7 +182,7 @@ bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
  * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
  * indefinite wait.
  */
-uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
-    uint64_t npages_threshold);
+uint64_t decay_ns_until_purge(
+    decay_t *decay, size_t npages_current, uint64_t npages_threshold);
 
 #endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h
index aebae939..56d5f463 100644
--- a/include/jemalloc/internal/div.h
+++ b/include/jemalloc/internal/div.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_DIV_H
 #define JEMALLOC_INTERNAL_DIV_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /*
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 71cae3e3..605733b5 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -1,15 +1,16 @@
 #ifndef JEMALLOC_INTERNAL_ECACHE_H
 #define JEMALLOC_INTERNAL_ECACHE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/eset.h"
-#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/san.h"
 
 typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
-	eset_t eset;
-	eset_t guarded_eset;
+	eset_t         eset;
+	eset_t         guarded_eset;
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
 	/* The index of the ehooks the ecache is associated with. */
@@ -23,22 +24,22 @@ struct ecache_s {
 
 static inline size_t
 ecache_npages_get(ecache_t *ecache) {
-	return eset_npages_get(&ecache->eset) +
-	    eset_npages_get(&ecache->guarded_eset);
+	return eset_npages_get(&ecache->eset)
+	    + eset_npages_get(&ecache->guarded_eset);
 }
 
 /* Get the number of extents in the given page size index. */
 static inline size_t
 ecache_nextents_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nextents_get(&ecache->eset, ind) +
-	    eset_nextents_get(&ecache->guarded_eset, ind);
+	return eset_nextents_get(&ecache->eset, ind)
+	    + eset_nextents_get(&ecache->guarded_eset, ind);
 }
 
 /* Get the sum total bytes of the extents in the given page size index. */
 static inline size_t
 ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nbytes_get(&ecache->eset, ind) +
-	    eset_nbytes_get(&ecache->guarded_eset, ind);
+	return eset_nbytes_get(&ecache->eset, ind)
+	    + eset_nbytes_get(&ecache->guarded_eset, ind);
 }
 
 static inline unsigned
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index af039ea7..06b6c545 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -1,12 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_EDATA_H
 #define JEMALLOC_INTERNAL_EDATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin_info.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/hpdata.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
@@ -19,10 +21,18 @@
  */
 #define EDATA_ALIGNMENT 128
 
+/*
+ * Defines how many nodes visited when enumerating the heap to search for
+ * qualified extents.  More nodes visited may result in better choices at
+ * the cost of longer search time.  This size should not exceed 2^16 - 1
+ * because we use uint16_t for accessing the queue needed for enumeration.
+ */
+#define ESET_ENUMERATE_MAX_NUM 32
+
 enum extent_state_e {
-	extent_state_active   = 0,
-	extent_state_dirty    = 1,
-	extent_state_muzzy    = 2,
+	extent_state_active = 0,
+	extent_state_dirty = 1,
+	extent_state_muzzy = 2,
 	extent_state_retained = 3,
 	extent_state_transition = 4, /* States below are intermediate. */
 	extent_state_merging = 5,
@@ -32,7 +42,7 @@ typedef enum extent_state_e extent_state_t;
 
 enum extent_head_state_e {
 	EXTENT_NOT_HEAD,
-	EXTENT_IS_HEAD   /* See comments in ehooks_default_merge_impl(). */
+	EXTENT_IS_HEAD /* See comments in ehooks_default_merge_impl(). */
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
@@ -40,25 +50,22 @@ typedef enum extent_head_state_e extent_head_state_t;
  * Which implementation of the page allocator interface, (PAI, defined in
  * pai.h) owns the given extent?
  */
-enum extent_pai_e {
-	EXTENT_PAI_PAC = 0,
-	EXTENT_PAI_HPA = 1
-};
+enum extent_pai_e { EXTENT_PAI_PAC = 0, EXTENT_PAI_HPA = 1 };
 typedef enum extent_pai_e extent_pai_t;
 
 struct e_prof_info_s {
 	/* Time when this was allocated. */
-	nstime_t	e_prof_alloc_time;
+	nstime_t e_prof_alloc_time;
 	/* Allocation request size. */
-	size_t		e_prof_alloc_size;
+	size_t e_prof_alloc_size;
 	/* Points to a prof_tctx_t. */
-	atomic_p_t	e_prof_tctx;
+	atomic_p_t e_prof_tctx;
 	/*
 	 * Points to a prof_recent_t for the allocation; NULL
 	 * means the recent allocation record no longer exists.
 	 * Protected by prof_recent_alloc_mtx.
 	 */
-	atomic_p_t	e_prof_recent_alloc;
+	atomic_p_t e_prof_recent_alloc;
 };
 typedef struct e_prof_info_s e_prof_info_t;
 
@@ -75,20 +82,20 @@ typedef struct e_prof_info_s e_prof_info_t;
  */
 typedef struct edata_map_info_s edata_map_info_t;
 struct edata_map_info_s {
-	bool slab;
+	bool    slab;
 	szind_t szind;
 };
 
 typedef struct edata_cmp_summary_s edata_cmp_summary_t;
 struct edata_cmp_summary_s {
-	uint64_t sn;
+	uint64_t  sn;
 	uintptr_t addr;
 };
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-ph_structs(edata_avail, edata_t);
-ph_structs(edata_heap, edata_t);
+ph_structs(edata_avail, edata_t, ESET_ENUMERATE_MAX_NUM);
+ph_structs(edata_heap, edata_t, ESET_ENUMERATE_MAX_NUM);
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -139,55 +146,72 @@ struct edata_s {
 	 *
 	 * bin_shard: the shard of the bin from which this extent came.
 	 */
-	uint64_t		e_bits;
-#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+	uint64_t e_bits;
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT)                         \
+	((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1))                   \
+	    << (CURRENT_FIELD_SHIFT))
 
-#define EDATA_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
-#define EDATA_BITS_ARENA_SHIFT  0
-#define EDATA_BITS_ARENA_MASK  MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS
+#define EDATA_BITS_ARENA_SHIFT 0
+#define EDATA_BITS_ARENA_MASK                                                  \
+	MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
 
-#define EDATA_BITS_SLAB_WIDTH  1
-#define EDATA_BITS_SLAB_SHIFT  (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
-#define EDATA_BITS_SLAB_MASK  MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_SLAB_WIDTH 1
+#define EDATA_BITS_SLAB_SHIFT (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_SLAB_MASK MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
 
-#define EDATA_BITS_COMMITTED_WIDTH  1
-#define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
-#define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_COMMITTED_WIDTH 1
+#define EDATA_BITS_COMMITTED_SHIFT                                             \
+	(EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_COMMITTED_MASK                                              \
+	MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EDATA_BITS_PAI_WIDTH  1
-#define EDATA_BITS_PAI_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
-#define EDATA_BITS_PAI_MASK  MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
+#define EDATA_BITS_PAI_WIDTH 1
+#define EDATA_BITS_PAI_SHIFT                                                   \
+	(EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_PAI_MASK MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
 
-#define EDATA_BITS_ZEROED_WIDTH  1
-#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
-#define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_ZEROED_WIDTH 1
+#define EDATA_BITS_ZEROED_SHIFT (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
+#define EDATA_BITS_ZEROED_MASK                                                 \
+	MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
-#define EDATA_BITS_GUARDED_WIDTH  1
-#define EDATA_BITS_GUARDED_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
-#define EDATA_BITS_GUARDED_MASK  MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
+#define EDATA_BITS_GUARDED_WIDTH 1
+#define EDATA_BITS_GUARDED_SHIFT                                               \
+	(EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_GUARDED_MASK                                                \
+	MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
 
-#define EDATA_BITS_STATE_WIDTH  3
-#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
-#define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_STATE_WIDTH 3
+#define EDATA_BITS_STATE_SHIFT                                                 \
+	(EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
+#define EDATA_BITS_STATE_MASK                                                  \
+	MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
-#define EDATA_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
-#define EDATA_BITS_SZIND_SHIFT  (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
-#define EDATA_BITS_SZIND_MASK  MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_SZIND_WIDTH LG_CEIL(SC_NSIZES)
+#define EDATA_BITS_SZIND_SHIFT (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_SZIND_MASK                                                  \
+	MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
 
-#define EDATA_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
-#define EDATA_BITS_NFREE_SHIFT  (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
-#define EDATA_BITS_NFREE_MASK  MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_NFREE_WIDTH (SC_LG_SLAB_MAXREGS + 1)
+#define EDATA_BITS_NFREE_SHIFT (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_NFREE_MASK                                                  \
+	MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
 
-#define EDATA_BITS_BINSHARD_WIDTH  6
-#define EDATA_BITS_BINSHARD_SHIFT  (EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
-#define EDATA_BITS_BINSHARD_MASK  MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_BINSHARD_WIDTH 6
+#define EDATA_BITS_BINSHARD_SHIFT                                              \
+	(EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_BINSHARD_MASK                                               \
+	MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
 
 #define EDATA_BITS_IS_HEAD_WIDTH 1
-#define EDATA_BITS_IS_HEAD_SHIFT  (EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
-#define EDATA_BITS_IS_HEAD_MASK  MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
+#define EDATA_BITS_IS_HEAD_SHIFT                                               \
+	(EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_IS_HEAD_MASK                                                \
+	MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
-	void			*e_addr;
+	void *e_addr;
 
 	union {
 		/*
@@ -197,16 +221,16 @@ struct edata_s {
 		 *
 		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
 		 */
-		size_t			e_size_esn;
-	#define EDATA_SIZE_MASK	((size_t)~(PAGE-1))
-	#define EDATA_ESN_MASK		((size_t)PAGE-1)
+		size_t e_size_esn;
+#define EDATA_SIZE_MASK ((size_t) ~(PAGE - 1))
+#define EDATA_ESN_MASK ((size_t)PAGE - 1)
 		/* Base extent size, which may not be a multiple of PAGE. */
-		size_t			e_bsize;
+		size_t e_bsize;
 	};
 
 	/*
 	 * If this edata is a user allocation from an HPA, it comes out of some
-	 * pageslab (we don't yet support huegpage allocations that don't fit
+	 * pageslab (we don't yet support hugepage allocations that don't fit
 	 * into pageslabs).  This tracks it.
 	 */
 	hpdata_t *e_ps;
@@ -222,7 +246,7 @@ struct edata_s {
 		 * List linkage used when the edata_t is active; either in
 		 * arena's large allocations or bin_t's slabs_full.
 		 */
-		ql_elm(edata_t)	ql_link_active;
+		ql_elm(edata_t) ql_link_active;
 		/*
 		 * Pairing heap linkage.  Used whenever the extent is inactive
 		 * (in the page allocators), or when it is active and in
@@ -230,7 +254,7 @@ struct edata_s {
 		 * extent and sitting in an edata_cache.
 		 */
 		union {
-			edata_heap_link_t heap_link;
+			edata_heap_link_t  heap_link;
 			edata_avail_link_t avail_link;
 		};
 	};
@@ -243,10 +267,10 @@ struct edata_s {
 		 */
 		ql_elm(edata_t) ql_link_inactive;
 		/* Small region slab metadata. */
-		slab_data_t	e_slab_data;
+		slab_data_t e_slab_data;
 
 		/* Profiling data, used for large objects. */
-		e_prof_info_t	e_prof_info;
+		e_prof_info_t e_prof_info;
 	};
 };
 
@@ -255,8 +279,8 @@ TYPED_LIST(edata_list_inactive, edata_t, ql_link_inactive)
 
 static inline unsigned
 edata_arena_ind_get(const edata_t *edata) {
-	unsigned arena_ind = (unsigned)((edata->e_bits &
-	    EDATA_BITS_ARENA_MASK) >> EDATA_BITS_ARENA_SHIFT);
+	unsigned arena_ind = (unsigned)((edata->e_bits & EDATA_BITS_ARENA_MASK)
+	    >> EDATA_BITS_ARENA_SHIFT);
 	assert(arena_ind < MALLOCX_ARENA_LIMIT);
 
 	return arena_ind;
@@ -264,8 +288,8 @@ edata_arena_ind_get(const edata_t *edata) {
 
 static inline szind_t
 edata_szind_get_maybe_invalid(const edata_t *edata) {
-	szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK) >>
-	    EDATA_BITS_SZIND_SHIFT);
+	szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK)
+	    >> EDATA_BITS_SZIND_SHIFT);
 	assert(szind <= SC_NSIZES);
 	return szind;
 }
@@ -279,13 +303,61 @@ edata_szind_get(const edata_t *edata) {
 
 static inline size_t
 edata_usize_get(const edata_t *edata) {
-	return sz_index2size(edata_szind_get(edata));
+	assert(edata != NULL);
+	/*
+	 * When sz_large_size_classes_disabled() is true, two cases:
+	 * 1. if usize_from_ind is not smaller than SC_LARGE_MINCLASS,
+	 * usize_from_size is accurate;
+	 * 2. otherwise, usize_from_ind is accurate.
+	 *
+	 * When sz_large_size_classes_disabled() is not true, the two should be the
+	 * same when usize_from_ind is not smaller than SC_LARGE_MINCLASS.
+	 *
+	 * Note sampled small allocs will be promoted.  Their extent size is
+	 * recorded in edata_size_get(edata), while their szind reflects the
+	 * true usize.  Thus, usize retrieved here is still accurate for
+	 * sampled small allocs.
+	 */
+	szind_t szind = edata_szind_get(edata);
+#ifdef JEMALLOC_JET
+	/*
+	 * Double free is invalid and results in undefined behavior.  However,
+	 * for double free tests to end gracefully, return an invalid usize
+	 * when szind shows the edata is not active, i.e., szind == SC_NSIZES.
+	 */
+	if (unlikely(szind == SC_NSIZES)) {
+		return SC_LARGE_MAXCLASS + 1;
+	}
+#endif
+
+	if (!sz_large_size_classes_disabled() || szind < SC_NBINS) {
+		size_t usize_from_ind = sz_index2size(szind);
+		if (!sz_large_size_classes_disabled()
+		    && usize_from_ind >= SC_LARGE_MINCLASS) {
+			size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
+			assert(size > sz_large_pad);
+			size_t usize_from_size = size - sz_large_pad;
+			assert(usize_from_ind == usize_from_size);
+		}
+		return usize_from_ind;
+	}
+
+	size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
+	assert(size > sz_large_pad);
+	size_t usize_from_size = size - sz_large_pad;
+	/*
+	 * no matter large size classes disabled or not, usize retrieved from
+	 * size is not accurate when smaller than SC_LARGE_MINCLASS.
+	 */
+	assert(usize_from_size >= SC_LARGE_MINCLASS);
+	return usize_from_size;
 }
 
 static inline unsigned
 edata_binshard_get(const edata_t *edata) {
-	unsigned binshard = (unsigned)((edata->e_bits &
-	    EDATA_BITS_BINSHARD_MASK) >> EDATA_BITS_BINSHARD_SHIFT);
+	unsigned binshard = (unsigned)((edata->e_bits
+	                                   & EDATA_BITS_BINSHARD_MASK)
+	    >> EDATA_BITS_BINSHARD_SHIFT);
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
 	return binshard;
 }
@@ -297,58 +369,58 @@ edata_sn_get(const edata_t *edata) {
 
 static inline extent_state_t
 edata_state_get(const edata_t *edata) {
-	return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK) >>
-	    EDATA_BITS_STATE_SHIFT);
+	return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK)
+	    >> EDATA_BITS_STATE_SHIFT);
 }
 
 static inline bool
 edata_guarded_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK) >>
-	    EDATA_BITS_GUARDED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK)
+	    >> EDATA_BITS_GUARDED_SHIFT);
 }
 
 static inline bool
 edata_zeroed_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK) >>
-	    EDATA_BITS_ZEROED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK)
+	    >> EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline bool
 edata_committed_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK) >>
-	    EDATA_BITS_COMMITTED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK)
+	    >> EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline extent_pai_t
 edata_pai_get(const edata_t *edata) {
-	return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK) >>
-	    EDATA_BITS_PAI_SHIFT);
+	return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK)
+	    >> EDATA_BITS_PAI_SHIFT);
 }
 
 static inline bool
 edata_slab_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
-	    EDATA_BITS_SLAB_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK)
+	    >> EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline unsigned
 edata_nfree_get(const edata_t *edata) {
 	assert(edata_slab_get(edata));
-	return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK) >>
-	    EDATA_BITS_NFREE_SHIFT);
+	return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK)
+	    >> EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void *
 edata_base_get(const edata_t *edata) {
-	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
-	    !edata_slab_get(edata));
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr)
+	    || !edata_slab_get(edata));
 	return PAGE_ADDR2BASE(edata->e_addr);
 }
 
 static inline void *
 edata_addr_get(const edata_t *edata) {
-	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
-	    !edata_slab_get(edata));
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr)
+	    || !edata_slab_get(edata));
 	return edata->e_addr;
 }
 
@@ -375,19 +447,19 @@ edata_ps_get(const edata_t *edata) {
 
 static inline void *
 edata_before_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
+	return (void *)((byte_t *)edata_base_get(edata) - PAGE);
 }
 
 static inline void *
 edata_last_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) +
-	    edata_size_get(edata) - PAGE);
+	return (void *)((byte_t *)edata_base_get(edata) + edata_size_get(edata)
+	    - PAGE);
 }
 
 static inline void *
 edata_past_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) +
-	    edata_size_get(edata));
+	return (
+	    void *)((byte_t *)edata_base_get(edata) + edata_size_get(edata));
 }
 
 static inline slab_data_t *
@@ -404,8 +476,8 @@ edata_slab_data_get_const(const edata_t *edata) {
 
 static inline prof_tctx_t *
 edata_prof_tctx_get(const edata_t *edata) {
-	return (prof_tctx_t *)atomic_load_p(&edata->e_prof_info.e_prof_tctx,
-	    ATOMIC_ACQUIRE);
+	return (prof_tctx_t *)atomic_load_p(
+	    &edata->e_prof_info.e_prof_tctx, ATOMIC_ACQUIRE);
 }
 
 static inline const nstime_t *
@@ -426,16 +498,16 @@ edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
 
 static inline void
 edata_arena_ind_set(edata_t *edata, unsigned arena_ind) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK) |
-	    ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK)
+	    | ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
 }
 
 static inline void
 edata_binshard_set(edata_t *edata, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK) |
-	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK)
+	    | ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
 }
 
 static inline void
@@ -451,8 +523,8 @@ edata_size_set(edata_t *edata, size_t size) {
 
 static inline void
 edata_esn_set(edata_t *edata, size_t esn) {
-	edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK) | (esn &
-	    EDATA_ESN_MASK);
+	edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK)
+	    | (esn & EDATA_ESN_MASK);
 }
 
 static inline void
@@ -469,25 +541,26 @@ edata_ps_set(edata_t *edata, hpdata_t *ps) {
 static inline void
 edata_szind_set(edata_t *edata, szind_t szind) {
 	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK) |
-	    ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK)
+	    | ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
 }
 
 static inline void
 edata_nfree_set(edata_t *edata, unsigned nfree) {
 	assert(edata_slab_get(edata));
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK) |
-	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK)
+	    | ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
 edata_nfree_binshard_set(edata_t *edata, unsigned nfree, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
-	edata->e_bits = (edata->e_bits &
-	    (~EDATA_BITS_NFREE_MASK & ~EDATA_BITS_BINSHARD_MASK)) |
-	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT) |
-	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
+	edata->e_bits = (edata->e_bits
+	                    & (~EDATA_BITS_NFREE_MASK
+	                        & ~EDATA_BITS_BINSHARD_MASK))
+	    | ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT)
+	    | ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
@@ -515,38 +588,38 @@ edata_sn_set(edata_t *edata, uint64_t sn) {
 
 static inline void
 edata_state_set(edata_t *edata, extent_state_t state) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK) |
-	    ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK)
+	    | ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
 }
 
 static inline void
 edata_guarded_set(edata_t *edata, bool guarded) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK) |
-	    ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK)
+	    | ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
 }
 
 static inline void
 edata_zeroed_set(edata_t *edata, bool zeroed) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK) |
-	    ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK)
+	    | ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline void
 edata_committed_set(edata_t *edata, bool committed) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK) |
-	    ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK)
+	    | ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline void
 edata_pai_set(edata_t *edata, extent_pai_t pai) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK) |
-	    ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK)
+	    | ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
 }
 
 static inline void
 edata_slab_set(edata_t *edata, bool slab) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
-	    ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK)
+	    | ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline void
@@ -565,22 +638,22 @@ edata_prof_alloc_size_set(edata_t *edata, size_t size) {
 }
 
 static inline void
-edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_set_dont_call_directly(
+    edata_t *edata, prof_recent_t *recent_alloc) {
 	atomic_store_p(&edata->e_prof_info.e_prof_recent_alloc, recent_alloc,
 	    ATOMIC_RELAXED);
 }
 
 static inline bool
 edata_is_head_get(edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK) >>
-	    EDATA_BITS_IS_HEAD_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK)
+	    >> EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
 edata_is_head_set(edata_t *edata, bool is_head) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK) |
-	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK)
+	    | ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline bool
@@ -619,7 +692,8 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 }
 
 static inline void
-edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
+edata_binit(
+    edata_t *edata, void *addr, size_t bsize, uint64_t sn, bool reused) {
 	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
 	edata_addr_set(edata, addr);
 	edata_bsize_set(edata, bsize);
@@ -627,7 +701,8 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 	edata_szind_set(edata, SC_NSIZES);
 	edata_sn_set(edata, sn);
 	edata_state_set(edata, extent_state_active);
-	edata_guarded_set(edata, false);
+	/* See comments in base_edata_is_reused. */
+	edata_guarded_set(edata, reused);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
 	/*
@@ -656,20 +731,47 @@ edata_ead_comp(const edata_t *a, const edata_t *b) {
 
 static inline edata_cmp_summary_t
 edata_cmp_summary_get(const edata_t *edata) {
-	return (edata_cmp_summary_t){edata_sn_get(edata),
-		(uintptr_t)edata_addr_get(edata)};
+	edata_cmp_summary_t result;
+	result.sn = edata_sn_get(edata);
+	result.addr = (uintptr_t)edata_addr_get(edata);
+	return result;
+}
+
+#ifdef JEMALLOC_HAVE_INT128
+JEMALLOC_ALWAYS_INLINE unsigned __int128
+edata_cmp_summary_encode(edata_cmp_summary_t src) {
+	return ((unsigned __int128)src.sn << 64) | src.addr;
 }
 
 static inline int
 edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
-	int ret;
-	ret = (a.sn > b.sn) - (a.sn < b.sn);
-	if (ret != 0) {
-		return ret;
-	}
-	ret = (a.addr > b.addr) - (a.addr < b.addr);
-	return ret;
+	unsigned __int128 a_encoded = edata_cmp_summary_encode(a);
+	unsigned __int128 b_encoded = edata_cmp_summary_encode(b);
+	if (a_encoded < b_encoded)
+		return -1;
+	if (a_encoded == b_encoded)
+		return 0;
+	return 1;
 }
+#else
+static inline int
+edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
+	/*
+	 * Logically, what we're doing here is comparing based on `.sn`, and
+	 * falling back to comparing on `.addr` in the case that `a.sn == b.sn`.
+	 * We accomplish this by multiplying the result of the `.sn` comparison
+	 * by 2, so that so long as it is not 0, it will dominate the `.addr`
+	 * comparison in determining the sign of the returned result value.
+	 * The justification for doing things this way is that this is
+	 * branchless - all of the branches that would be present in a
+	 * straightforward implementation are common cases, and thus the branch
+	 * prediction accuracy is not great. As a result, this implementation
+	 * is measurably faster (by around 30%).
+	 */
+	return (2 * ((a.sn > b.sn) - (a.sn < b.sn)))
+	    + ((a.addr > b.addr) - (a.addr < b.addr));
+}
+#endif
 
 static inline int
 edata_snad_comp(const edata_t *a, const edata_t *b) {
@@ -681,18 +783,13 @@ edata_snad_comp(const edata_t *a, const edata_t *b) {
 
 static inline int
 edata_esnead_comp(const edata_t *a, const edata_t *b) {
-	int ret;
-
-	ret = edata_esn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = edata_ead_comp(a, b);
-	return ret;
+	/*
+	 * Similar to `edata_cmp_summary_comp`, we've opted for a
+	 * branchless implementation for the sake of performance.
+	 */
+	return (2 * edata_esn_comp(a, b)) + edata_ead_comp(a, b);
 }
 
-ph_proto(, edata_avail, edata_t)
-ph_proto(, edata_heap, edata_t)
+ph_proto(, edata_avail, edata_t) ph_proto(, edata_heap, edata_t)
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 8b6c0ef7..d92d90cb 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EDATA_CACHE_H
 #define JEMALLOC_INTERNAL_EDATA_CACHE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 
 /* For tests only. */
@@ -14,13 +15,13 @@
 
 typedef struct edata_cache_s edata_cache_t;
 struct edata_cache_s {
-	edata_avail_t avail;
-	atomic_zu_t count;
+	edata_avail_t  avail;
+	atomic_zu_t    count;
 	malloc_mutex_t mtx;
-	base_t *base;
+	base_t        *base;
 };
 
-bool edata_cache_init(edata_cache_t *edata_cache, base_t *base);
+bool     edata_cache_init(edata_cache_t *edata_cache, base_t *base);
 edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata);
 
@@ -36,14 +37,14 @@ void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 typedef struct edata_cache_fast_s edata_cache_fast_t;
 struct edata_cache_fast_s {
 	edata_list_inactive_t list;
-	edata_cache_t *fallback;
-	bool disabled;
+	edata_cache_t        *fallback;
+	bool                  disabled;
 };
 
 void edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback);
 edata_t *edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs);
-void edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs,
-    edata_t *edata);
+void     edata_cache_fast_put(
+        tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata);
 void edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 8d9513e2..c65e189a 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/tsd_types.h"
 
 /*
  * This module is the internal interface to the extent hooks (both
@@ -43,17 +46,17 @@ extern const extent_hooks_t ehooks_default_extent_hooks;
  */
 void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
-bool ehooks_default_dalloc_impl(void *addr, size_t size);
-void ehooks_default_destroy_impl(void *addr, size_t size);
-bool ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
+bool  ehooks_default_dalloc_impl(void *addr, size_t size);
+void  ehooks_default_destroy_impl(void *addr, size_t size);
+bool  ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
+bool  ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
 #ifdef PAGES_CAN_PURGE_LAZY
 bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
-bool ehooks_default_split_impl();
+bool ehooks_default_split_impl(void);
 /*
  * Merge is the only default extent hook we declare -- see the comment in
  * ehooks_merge.
@@ -113,8 +116,8 @@ ehooks_get_extent_hooks_ptr(ehooks_t *ehooks) {
 
 static inline bool
 ehooks_are_default(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks) ==
-	    &ehooks_default_extent_hooks;
+	return ehooks_get_extent_hooks_ptr(ehooks)
+	    == &ehooks_default_extent_hooks;
 }
 
 /*
@@ -186,16 +189,15 @@ ehooks_debug_zero_check(void *addr, size_t size) {
 	}
 }
 
-
 static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit) {
-	bool orig_zero = *zero;
-	void *ret;
+	bool            orig_zero = *zero;
+	void           *ret;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		ret = ehooks_default_alloc_impl(tsdn, new_addr, size,
-		    alignment, zero, commit, ehooks_ind_get(ehooks));
+		ret = ehooks_default_alloc_impl(tsdn, new_addr, size, alignment,
+		    zero, commit, ehooks_ind_get(ehooks));
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		ret = extent_hooks->alloc(extent_hooks, new_addr, size,
@@ -211,8 +213,8 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
 }
 
 static inline bool
-ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed) {
+ehooks_dalloc(
+    tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_dalloc_impl(addr, size);
@@ -228,8 +230,8 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline void
-ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed) {
+ehooks_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		ehooks_default_destroy_impl(addr, size);
@@ -247,15 +249,15 @@ static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	bool err;
+	bool            err;
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		err = ehooks_default_commit_impl(addr, offset, length);
 	} else if (extent_hooks->commit == NULL) {
 		err = true;
 	} else {
 		ehooks_pre_reentrancy(tsdn);
-		err = extent_hooks->commit(extent_hooks, addr, size,
-		    offset, length, ehooks_ind_get(ehooks));
+		err = extent_hooks->commit(extent_hooks, addr, size, offset,
+		    length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 	if (!err) {
@@ -381,7 +383,7 @@ ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size) {
 
 static inline bool
 ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
-	bool err;
+	bool            err;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 
 	if (extent_hooks == &ehooks_default_extent_hooks) {
@@ -396,7 +398,7 @@ ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
 
 static inline bool
 ehooks_unguard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
-	bool err;
+	bool            err;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 
 	if (extent_hooks == &ehooks_default_extent_hooks) {
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 847af327..88692356 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EMAP_H
 #define JEMALLOC_INTERNAL_EMAP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -9,9 +10,9 @@
  *     EMAP_DECLARE_RTREE_CTX;
  * in uses will avoid empty-statement warnings.
  */
-#define EMAP_DECLARE_RTREE_CTX						\
-    rtree_ctx_t rtree_ctx_fallback;					\
-    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
+#define EMAP_DECLARE_RTREE_CTX                                                 \
+	rtree_ctx_t  rtree_ctx_fallback;                                       \
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
 
 typedef struct emap_s emap_t;
 struct emap_s {
@@ -19,26 +20,27 @@ struct emap_s {
 };
 
 /* Used to pass rtree lookup context down the path. */
-typedef struct emap_alloc_ctx_t emap_alloc_ctx_t;
-struct emap_alloc_ctx_t {
+typedef struct emap_alloc_ctx_s emap_alloc_ctx_t;
+struct emap_alloc_ctx_s {
+	size_t  usize;
 	szind_t szind;
-	bool slab;
+	bool    slab;
 };
 
 typedef struct emap_full_alloc_ctx_s emap_full_alloc_ctx_t;
 struct emap_full_alloc_ctx_s {
-	szind_t szind;
-	bool slab;
+	szind_t  szind;
+	bool     slab;
 	edata_t *edata;
 };
 
 bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
-    bool slab);
+void emap_remap(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab);
 
-void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t state);
+void emap_update_edata_state(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t state);
 
 /*
  * The two acquire functions below allow accessing neighbor edatas, if it's safe
@@ -60,16 +62,16 @@ edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
     bool forward);
 edata_t *emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
     edata_t *edata, extent_pai_t pai, extent_state_t expected_state);
-void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t new_state);
+void     emap_release_edata(
+        tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t new_state);
 
 /*
  * Associate the given edata with its beginning and end address, setting the
  * szind and slab info appropriately.
  * Returns true on error (i.e. resource exhaustion).
  */
-bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind, bool slab);
+bool emap_register_boundary(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab);
 
 /*
  * Does the same thing, but with the interior of the range, for slab
@@ -90,8 +92,8 @@ bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
  * touched, so no allocation is necessary to fill the interior once the boundary
  * has been touched.
  */
-void emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind);
+void emap_register_interior(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind);
 
 void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
 void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
@@ -159,8 +161,8 @@ emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	emap_assert_mapped(tsdn, emap, edata);
 
 	EMAP_DECLARE_RTREE_CTX;
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata));
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)edata_base_get(edata));
 
 	return edata_state_in_transition(contents.metadata.state);
 }
@@ -185,16 +187,16 @@ emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	 */
 	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
+	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ false,
 	    /* init_missing */ false);
 	if (elm == NULL) {
 		return true;
 	}
 	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true);
-	if (contents.edata == NULL ||
-	    contents.metadata.state == extent_state_active ||
-	    edata_state_in_transition(contents.metadata.state)) {
+	    /* dependent */ false);
+	if (contents.edata == NULL
+	    || contents.metadata.state == extent_state_active
+	    || edata_state_in_transition(contents.metadata.state)) {
 		return true;
 	}
 
@@ -209,8 +211,8 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
 	assert(edata_state_get(inner) == extent_state_active);
 	assert(edata_state_get(outer) == extent_state_merging);
 	assert(!edata_guarded_get(inner) && !edata_guarded_get(outer));
-	assert(edata_base_get(inner) == edata_past_get(outer) ||
-	    edata_base_get(outer) == edata_past_get(inner));
+	assert(edata_base_get(inner) == edata_past_get(outer)
+	    || edata_base_get(outer) == edata_past_get(inner));
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -229,16 +231,46 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+emap_alloc_ctx_init(
+    emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab, size_t usize) {
+	alloc_ctx->szind = szind;
+	alloc_ctx->slab = slab;
+	alloc_ctx->usize = usize;
+	assert(
+	    sz_large_size_classes_disabled() || usize == sz_index2size(szind));
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
+	assert(alloc_ctx->szind < SC_NSIZES);
+	if (alloc_ctx->slab) {
+		assert(alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+		return sz_index2size(alloc_ctx->szind);
+	}
+	assert(sz_large_size_classes_disabled()
+	    || alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+	assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS);
+	return alloc_ctx->usize;
+}
+
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_lookup(
+    tsdn_t *tsdn, emap_t *emap, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr);
-	alloc_ctx->szind = metadata.szind;
-	alloc_ctx->slab = metadata.slab;
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr);
+	/*
+	 * If the alloc is invalid, do not calculate usize since edata
+	 * could be corrupted.
+	 */
+	emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
+	    contents.metadata.slab,
+	    (contents.metadata.szind == SC_NSIZES || contents.edata == NULL)
+	        ? 0
+	        : edata_usize_get(contents.edata));
 }
 
 /* The pointer must be mapped. */
@@ -247,8 +279,8 @@ emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr);
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr);
 	full_alloc_ctx->edata = contents.edata;
 	full_alloc_ctx->szind = contents.metadata.szind;
 	full_alloc_ctx->slab = contents.metadata.slab;
@@ -265,8 +297,8 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_contents_t contents;
-	bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr, &contents);
+	bool             err = rtree_read_independent(
+            tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr, &contents);
 	if (err) {
 		return true;
 	}
@@ -281,19 +313,26 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
  * fast path, e.g. when the metadata key is not cached.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_try_lookup_fast(
+    tsd_t *tsd, emap_t *emap, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	/* Use the unsafe getter since this may gets called during exit. */
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get_unsafe(tsd);
 
 	rtree_metadata_t metadata;
-	bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr, &metadata);
+	bool             err = rtree_metadata_try_read_fast(
+            tsd_tsdn(tsd), &emap->rtree, rtree_ctx, (uintptr_t)ptr, &metadata);
 	if (err) {
 		return true;
 	}
+	/*
+	 * Small allocs using the fastpath can always use index to get the
+	 * usize.  Therefore, do not set alloc_ctx->usize here.
+	 */
 	alloc_ctx->szind = metadata.szind;
 	alloc_ctx->slab = metadata.slab;
+	if (config_debug) {
+		alloc_ctx->usize = SC_LARGE_MAXCLASS + 1;
+	}
 	return false;
 }
 
@@ -308,11 +347,12 @@ typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
  * This allows size-checking assertions, which we can only do while we're in the
  * process of edata lookups.
  */
-typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
+typedef void (*emap_metadata_visitor)(
+    void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
 
 typedef union emap_batch_lookup_result_u emap_batch_lookup_result_t;
 union emap_batch_lookup_result_u {
-	edata_t *edata;
+	edata_t          *edata;
 	rtree_leaf_elm_t *rtree_leaf;
 };
 
@@ -338,8 +378,8 @@ emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
 
 	for (size_t i = 0; i < nptrs; i++) {
 		rtree_leaf_elm_t *elm = result[i].rtree_leaf;
-		rtree_contents_t contents = rtree_leaf_elm_read(tsd_tsdn(tsd),
-		    &emap->rtree, elm, /* dependent */ true);
+		rtree_contents_t  contents = rtree_leaf_elm_read(
+                    tsd_tsdn(tsd), &emap->rtree, elm, /* dependent */ true);
 		result[i].edata = contents.edata;
 		emap_full_alloc_ctx_t alloc_ctx;
 		/*
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 9482f68b..a4073e6a 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EMITTER_H
 #define JEMALLOC_INTERNAL_EMITTER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/ql.h"
 
 typedef enum emitter_output_e emitter_output_t;
@@ -40,18 +44,18 @@ typedef struct emitter_col_s emitter_col_t;
 struct emitter_col_s {
 	/* Filled in by the user. */
 	emitter_justify_t justify;
-	int width;
-	emitter_type_t type;
+	int               width;
+	emitter_type_t    type;
 	union {
-		bool bool_val;
-		int int_val;
-		unsigned unsigned_val;
-		uint32_t uint32_val;
-		uint32_t uint32_t_val;
-		uint64_t uint64_val;
-		uint64_t uint64_t_val;
-		size_t size_val;
-		ssize_t ssize_val;
+		bool        bool_val;
+		int         int_val;
+		unsigned    unsigned_val;
+		uint32_t    uint32_val;
+		uint32_t    uint32_t_val;
+		uint64_t    uint64_val;
+		uint64_t    uint64_t_val;
+		size_t      size_val;
+		ssize_t     ssize_val;
 		const char *str_val;
 	};
 
@@ -69,8 +73,8 @@ struct emitter_s {
 	emitter_output_t output;
 	/* The output information. */
 	write_cb_t *write_cb;
-	void *cbopaque;
-	int nesting_depth;
+	void       *cbopaque;
+	int         nesting_depth;
 	/* True if we've already emitted a value at the given depth. */
 	bool item_at_depth;
 	/* True if we emitted a key and will emit corresponding value next. */
@@ -79,8 +83,8 @@ struct emitter_s {
 
 static inline bool
 emitter_outputs_json(emitter_t *emitter) {
-	return emitter->output == emitter_output_json ||
-	    emitter->output == emitter_output_json_compact;
+	return emitter->output == emitter_output_json
+	    || emitter->output == emitter_output_json_compact;
 }
 
 /* Internal convenience function.  Write to the emitter the given string. */
@@ -94,26 +98,57 @@ emitter_printf(emitter_t *emitter, const char *format, ...) {
 	va_end(ap);
 }
 
-static inline const char * JEMALLOC_FORMAT_ARG(3)
-emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
-    emitter_justify_t justify, int width) {
+static inline const char *
+JEMALLOC_FORMAT_ARG(3) emitter_gen_fmt(char *out_fmt, size_t out_size,
+    const char *fmt_specifier, emitter_justify_t justify, int width) {
 	size_t written;
 	fmt_specifier++;
 	if (justify == emitter_justify_none) {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%%s", fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%%s", fmt_specifier);
 	} else if (justify == emitter_justify_left) {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%-%d%s", width, fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%-%d%s", width, fmt_specifier);
 	} else {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%%d%s", width, fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%%d%s", width, fmt_specifier);
 	}
 	/* Only happens in case of bad format string, which *we* choose. */
-	assert(written <  out_size);
+	assert(written < out_size);
 	return out_fmt;
 }
 
+static inline void
+emitter_emit_str(emitter_t *emitter, emitter_justify_t justify, int width,
+    char *fmt, size_t fmt_size, const char *str) {
+#define BUF_SIZE 256
+	char   buf[BUF_SIZE];
+	size_t str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", str);
+	emitter_printf(
+	    emitter, emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
+	if (str_written < BUF_SIZE) {
+		return;
+	}
+	/*
+	 * There is no support for long string justification at the moment as
+	 * we output them partially with multiple malloc_snprintf calls and
+	 * justufication will work correctly only withing one call.
+	 * Fortunately this is not a big concern as we don't use justufication
+	 * with long strings right now.
+	 *
+	 * We emitted leading quotation mark and trailing '\0', hence need to
+	 * exclude extra characters from str shift.
+	 */
+	str += BUF_SIZE - 2;
+	do {
+		str_written = malloc_snprintf(buf, BUF_SIZE, "%s\"", str);
+		str += str_written >= BUF_SIZE ? BUF_SIZE - 1 : str_written;
+		emitter_printf(emitter,
+		    emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
+	} while (str_written >= BUF_SIZE);
+#undef BUF_SIZE
+}
+
 /*
  * Internal.  Emit the given value type in the relevant encoding (so that the
  * bool true gets mapped to json "true", but the string "true" gets mapped to
@@ -124,8 +159,6 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
 static inline void
 emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
     emitter_type_t value_type, const void *value) {
-	size_t str_written;
-#define BUF_SIZE 256
 #define FMT_SIZE 10
 	/*
 	 * We dynamically generate a format string to emit, to let us use the
@@ -134,18 +167,17 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	 * cases.
 	 */
 	char fmt[FMT_SIZE];
-	char buf[BUF_SIZE];
 
-#define EMIT_SIMPLE(type, format)					\
-	emitter_printf(emitter,						\
-	    emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width),	\
+#define EMIT_SIMPLE(type, format)                                              \
+	emitter_printf(emitter,                                                \
+	    emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width),            \
 	    *(const type *)value);
 
 	switch (value_type) {
 	case emitter_type_bool:
 		emitter_printf(emitter,
 		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
-		    *(const bool *)value ?  "true" : "false");
+		    *(const bool *)value ? "true" : "false");
 		break;
 	case emitter_type_int:
 		EMIT_SIMPLE(int, "%d")
@@ -163,15 +195,8 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 		EMIT_SIMPLE(size_t, "%zu")
 		break;
 	case emitter_type_string:
-		str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
+		emitter_emit_str(emitter, justify, width, fmt, FMT_SIZE,
 		    *(const char *const *)value);
-		/*
-		 * We control the strings we output; we shouldn't get anything
-		 * anywhere near the fmt size.
-		 */
-		assert(str_written < BUF_SIZE);
-		emitter_printf(emitter,
-		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
 		break;
 	case emitter_type_uint32:
 		EMIT_SIMPLE(uint32_t, "%" FMTu32)
@@ -185,11 +210,9 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	default:
 		unreachable();
 	}
-#undef BUF_SIZE
 #undef FMT_SIZE
 }
 
-
 /* Internal functions.  In json mode, tracks nesting state. */
 static inline void
 emitter_nest_inc(emitter_t *emitter) {
@@ -205,7 +228,7 @@ emitter_nest_dec(emitter_t *emitter) {
 
 static inline void
 emitter_indent(emitter_t *emitter) {
-	int amount = emitter->nesting_depth;
+	int         amount = emitter->nesting_depth;
 	const char *indent_str;
 	assert(emitter->output != emitter_output_json_compact);
 	if (emitter->output == emitter_output_json) {
@@ -267,12 +290,12 @@ emitter_json_key(emitter_t *emitter, const char *json_key) {
 }
 
 static inline void
-emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
-    const void *value) {
+emitter_json_value(
+    emitter_t *emitter, emitter_type_t value_type, const void *value) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
-		emitter_print_value(emitter, emitter_justify_none, -1,
-		    value_type, value);
+		emitter_print_value(
+		    emitter, emitter_justify_none, -1, value_type, value);
 		emitter->item_at_depth = true;
 	}
 }
@@ -343,7 +366,6 @@ emitter_json_object_end(emitter_t *emitter) {
 	}
 }
 
-
 /******************************************************************************/
 /* Table public API. */
 
@@ -365,14 +387,13 @@ emitter_table_dict_end(emitter_t *emitter) {
 
 static inline void
 emitter_table_kv_note(emitter_t *emitter, const char *table_key,
-    emitter_type_t value_type, const void *value,
-    const char *table_note_key, emitter_type_t table_note_value_type,
-    const void *table_note_value) {
+    emitter_type_t value_type, const void *value, const char *table_note_key,
+    emitter_type_t table_note_value_type, const void *table_note_value) {
 	if (emitter->output == emitter_output_table) {
 		emitter_indent(emitter);
 		emitter_printf(emitter, "%s: ", table_key);
-		emitter_print_value(emitter, emitter_justify_none, -1,
-		    value_type, value);
+		emitter_print_value(
+		    emitter, emitter_justify_none, -1, value_type, value);
 		if (table_note_key != NULL) {
 			emitter_printf(emitter, " (%s: ", table_note_key);
 			emitter_print_value(emitter, emitter_justify_none, -1,
@@ -391,7 +412,6 @@ emitter_table_kv(emitter_t *emitter, const char *table_key,
 	    emitter_type_bool, NULL);
 }
 
-
 /* Write to the emitter the given string, but only in table mode. */
 JEMALLOC_FORMAT_PRINTF(2, 3)
 static inline void
@@ -399,7 +419,8 @@ emitter_table_printf(emitter_t *emitter, const char *format, ...) {
 	if (emitter->output == emitter_output_table) {
 		va_list ap;
 		va_start(ap, format);
-		malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+		malloc_vcprintf(
+		    emitter->write_cb, emitter->cbopaque, format, ap);
 		va_end(ap);
 	}
 }
@@ -410,7 +431,7 @@ emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
 		return;
 	}
 	emitter_col_t *col;
-	ql_foreach(col, &row->cols, link) {
+	ql_foreach (col, &row->cols, link) {
 		emitter_print_value(emitter, col->justify, col->width,
 		    col->type, (const void *)&col->bool_val);
 	}
@@ -428,7 +449,6 @@ emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
 	ql_tail_insert(&row->cols, col, link);
 }
 
-
 /******************************************************************************/
 /*
  * Generalized public API. Emits using either JSON or table, according to
@@ -440,9 +460,8 @@ emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
  */
 static inline void
 emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
-    emitter_type_t value_type, const void *value,
-    const char *table_note_key, emitter_type_t table_note_value_type,
-    const void *table_note_value) {
+    emitter_type_t value_type, const void *value, const char *table_note_key,
+    emitter_type_t table_note_value_type, const void *table_note_value) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_value(emitter, value_type, value);
@@ -461,8 +480,8 @@ emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
 }
 
 static inline void
-emitter_dict_begin(emitter_t *emitter, const char *json_key,
-    const char *table_header) {
+emitter_dict_begin(
+    emitter_t *emitter, const char *json_key, const char *table_header) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_object_begin(emitter);
@@ -502,8 +521,9 @@ emitter_end(emitter_t *emitter) {
 	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth == 1);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "%s", emitter->output ==
-		    emitter_output_json_compact ? "}" : "\n}\n");
+		emitter_printf(emitter, "%s",
+		    emitter->output == emitter_output_json_compact ? "}"
+		                                                   : "\n}\n");
 	}
 }
 
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 4f689b47..9b7c4a89 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -1,9 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ESET_H
 #define JEMALLOC_INTERNAL_ESET_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/mutex.h"
 
 /*
diff --git a/include/jemalloc/internal/exp_grow.h b/include/jemalloc/internal/exp_grow.h
index 8566b8a4..8206ba85 100644
--- a/include/jemalloc/internal/exp_grow.h
+++ b/include/jemalloc/internal/exp_grow.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXP_GROW_H
 #define JEMALLOC_INTERNAL_EXP_GROW_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/sz.h"
 typedef struct exp_grow_s exp_grow_t;
 struct exp_grow_s {
 	/*
@@ -25,8 +27,7 @@ exp_grow_size_prepare(exp_grow_t *exp_grow, size_t alloc_size_min,
 	*r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
 	while (*r_alloc_size < alloc_size_min) {
 		(*r_skip)++;
-		if (exp_grow->next + *r_skip  >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+		if (exp_grow->next + *r_skip >= sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			return true;
 		}
@@ -42,7 +43,6 @@ exp_grow_size_commit(exp_grow_t *exp_grow, pszind_t skip) {
 	} else {
 		exp_grow->next = exp_grow->limit;
 	}
-
 }
 
 void exp_grow_init(exp_grow_t *exp_grow);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 1d51d410..a9f81cb7 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -1,8 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_H
 #define JEMALLOC_INTERNAL_EXTENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -19,50 +21,58 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
+#define PROCESS_MADVISE_MAX_BATCH_DEFAULT 0
+extern size_t opt_process_madvise_max_batch;
+
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+/* The iovec is on stack.  Limit the max batch to avoid stack overflow. */
+#	define PROCESS_MADVISE_MAX_BATCH_LIMIT                                \
+		(VARIABLE_ARRAY_SIZE_MAX / sizeof(struct iovec))
+#else
+#	define PROCESS_MADVISE_MAX_BATCH_LIMIT 0
+#endif
+
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool guarded);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool guarded);
-void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata);
+void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 void extent_gdump_add(tsdn_t *tsdn, const edata_t *edata);
 void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata);
-void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
+void extent_dalloc_gap(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
     bool growing_retained);
-void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
-void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
-bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
+void     extent_dalloc_wrapper(
+        tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
+void extent_dalloc_wrapper_purged(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
+void extent_destroy_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
-    bool holding_core_locks);
-bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b);
-bool extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    bool commit, bool zero, bool growing_retained);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks);
+bool     extent_merge_wrapper(
+        tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a, edata_t *b);
+bool   extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+      bool commit, bool zero, bool growing_retained);
 size_t extent_sn_next(pac_t *pac);
-bool extent_boot(void);
+bool   extent_boot(void);
 
 JEMALLOC_ALWAYS_INLINE bool
-extent_neighbor_head_state_mergeable(bool edata_is_head,
-    bool neighbor_is_head, bool forward) {
+extent_neighbor_head_state_mergeable(
+    bool edata_is_head, bool neighbor_is_head, bool forward) {
 	/*
 	 * Head states checking: disallow merging if the higher addr extent is a
 	 * head extent.  This helps preserve first-fit, and more importantly
@@ -90,8 +100,8 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 	}
 	/* It's not safe to access *neighbor yet; must verify states first. */
 	bool neighbor_is_head = contents.metadata.is_head;
-	if (!extent_neighbor_head_state_mergeable(edata_is_head_get(edata),
-	    neighbor_is_head, forward)) {
+	if (!extent_neighbor_head_state_mergeable(
+	        edata_is_head_get(edata), neighbor_is_head, forward)) {
 		return false;
 	}
 	extent_state_t neighbor_state = contents.metadata.state;
@@ -100,8 +110,9 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 			return false;
 		}
 		/* From this point, it's safe to access *neighbor. */
-		if (!expanding && (edata_committed_get(edata) !=
-		    edata_committed_get(neighbor))) {
+		if (!expanding
+		    && (edata_committed_get(edata)
+		        != edata_committed_get(neighbor))) {
 			/*
 			 * Some platforms (e.g. Windows) require an explicit
 			 * commit step (and writing to uncommitted memory is not
@@ -121,11 +132,11 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 		return false;
 	}
 	if (opt_retain) {
-		assert(edata_arena_ind_get(edata) ==
-		    edata_arena_ind_get(neighbor));
+		assert(edata_arena_ind_get(edata)
+		    == edata_arena_ind_get(neighbor));
 	} else {
-		if (edata_arena_ind_get(edata) !=
-		    edata_arena_ind_get(neighbor)) {
+		if (edata_arena_ind_get(edata)
+		    != edata_arena_ind_get(neighbor)) {
 			return false;
 		}
 	}
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index e8f02ce2..4bb3f51d 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -1,26 +1,30 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_DSS_H
 #define JEMALLOC_INTERNAL_EXTENT_DSS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/tsd_types.h"
+
 typedef enum {
-	dss_prec_disabled  = 0,
-	dss_prec_primary   = 1,
+	dss_prec_disabled = 0,
+	dss_prec_primary = 1,
 	dss_prec_secondary = 2,
 
-	dss_prec_limit     = 3
+	dss_prec_limit = 3
 } dss_prec_t;
 #define DSS_PREC_DEFAULT dss_prec_secondary
 #define DSS_DEFAULT "secondary"
 
-extern const char *dss_prec_names[];
+extern const char *const dss_prec_names[];
 
 extern const char *opt_dss;
 
 dss_prec_t extent_dss_prec_get(void);
-bool extent_dss_prec_set(dss_prec_t dss_prec);
-void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit);
-bool extent_in_dss(void *addr);
-bool extent_dss_mergeable(void *addr_a, void *addr_b);
-void extent_dss_boot(void);
+bool       extent_dss_prec_set(dss_prec_t dss_prec);
+void      *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+         size_t size, size_t alignment, bool *zero, bool *commit);
+bool       extent_in_dss(void *addr);
+bool       extent_dss_mergeable(void *addr_a, void *addr_b);
+void       extent_dss_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_DSS_H */
diff --git a/include/jemalloc/internal/extent_mmap.h b/include/jemalloc/internal/extent_mmap.h
index 55f17ee4..aa469896 100644
--- a/include/jemalloc/internal/extent_mmap.h
+++ b/include/jemalloc/internal/extent_mmap.h
@@ -1,10 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 extern bool opt_retain;
 
-void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit);
+void *extent_alloc_mmap(
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
 bool extent_dalloc_mmap(void *addr, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */
diff --git a/include/jemalloc/internal/fb.h b/include/jemalloc/internal/fb.h
index 90c4091f..bf76f362 100644
--- a/include/jemalloc/internal/fb.h
+++ b/include/jemalloc/internal/fb.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_FB_H
 #define JEMALLOC_INTERNAL_FB_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bit_util.h"
+
 /*
  * The flat bitmap module.  This has a larger API relative to the bitmap module
  * (supporting things like backwards searches, and searching for both set and
@@ -11,8 +15,8 @@
 
 typedef unsigned long fb_group_t;
 #define FB_GROUP_BITS (ZU(1) << (LG_SIZEOF_LONG + 3))
-#define FB_NGROUPS(nbits) ((nbits) / FB_GROUP_BITS \
-    + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
+#define FB_NGROUPS(nbits)                                                      \
+	((nbits) / FB_GROUP_BITS + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
 
 static inline void
 fb_init(fb_group_t *fb, size_t nbits) {
@@ -71,7 +75,6 @@ fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
 	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
 }
 
-
 /*
  * Some implementation details.  This visitation function lets us apply a group
  * visitor to each group in the bitmap (potentially modifying it).  The mask
@@ -90,7 +93,8 @@ fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
 	 * to from bit 0.
 	 */
 	size_t first_group_cnt = (start_bit_ind + cnt > FB_GROUP_BITS
-		? FB_GROUP_BITS - start_bit_ind : cnt);
+	        ? FB_GROUP_BITS - start_bit_ind
+	        : cnt);
 	/*
 	 * We can basically split affected words into:
 	 *   - The first group, where we touch only the high bits
@@ -100,8 +104,8 @@ fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
 	 * this can lead to bad codegen for those middle words.
 	 */
 	/* First group */
-	fb_group_t mask = ((~(fb_group_t)0)
-	    >> (FB_GROUP_BITS - first_group_cnt))
+	fb_group_t mask =
+	    ((~(fb_group_t)0) >> (FB_GROUP_BITS - first_group_cnt))
 	    << start_bit_ind;
 	visit(ctx, &fb[group_ind], mask);
 
@@ -172,12 +176,12 @@ fb_ucount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
  * Returns the number of bits in the bitmap if no such bit exists.
  */
 JEMALLOC_ALWAYS_INLINE ssize_t
-fb_find_impl(fb_group_t *fb, size_t nbits, size_t start, bool val,
-    bool forward) {
+fb_find_impl(
+    fb_group_t *fb, size_t nbits, size_t start, bool val, bool forward) {
 	assert(start < nbits);
-	size_t ngroups = FB_NGROUPS(nbits);
+	size_t  ngroups = FB_NGROUPS(nbits);
 	ssize_t group_ind = start / FB_GROUP_BITS;
-	size_t bit_ind = start % FB_GROUP_BITS;
+	size_t  bit_ind = start % FB_GROUP_BITS;
 
 	fb_group_t maybe_invert = (val ? 0 : (fb_group_t)-1);
 
@@ -261,8 +265,8 @@ fb_iter_range_impl(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 		return false;
 	}
 	/* Half open range; the set bits are [begin, end). */
-	ssize_t next_range_end = fb_find_impl(fb, nbits, next_range_begin, !val,
-	    forward);
+	ssize_t next_range_end = fb_find_impl(
+	    fb, nbits, next_range_begin, !val, forward);
 	if (forward) {
 		*r_begin = next_range_begin;
 		*r_len = next_range_end - next_range_begin;
@@ -320,8 +324,9 @@ fb_range_longest_impl(fb_group_t *fb, size_t nbits, bool val) {
 	size_t begin = 0;
 	size_t longest_len = 0;
 	size_t len = 0;
-	while (begin < nbits && fb_iter_range_impl(fb, nbits, begin, &begin,
-	    &len, val, /* forward */ true)) {
+	while (begin < nbits
+	    && fb_iter_range_impl(
+	        fb, nbits, begin, &begin, &len, val, /* forward */ true)) {
 		if (len > longest_len) {
 			longest_len = len;
 		}
diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index 415a9828..8ca4f3c6 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_FXP_H
 #define JEMALLOC_INTERNAL_FXP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+
 /*
  * A simple fixed-point math implementation, supporting only unsigned values
  * (with overflow being an error).
@@ -86,7 +89,7 @@ fxp_round_down(fxp_t a) {
 
 static inline uint32_t
 fxp_round_nearest(fxp_t a) {
-	uint32_t fractional_part = (a  & ((1U << 16) - 1));
+	uint32_t fractional_part = (a & ((1U << 16) - 1));
 	uint32_t increment = (uint32_t)(fractional_part >= (1U << 15));
 	return (a >> 16) + increment;
 }
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 7f945679..73e2214e 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HASH_H
 #define JEMALLOC_INTERNAL_HASH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /*
@@ -24,7 +25,7 @@ hash_rotl_64(uint64_t x, int8_t r) {
 static inline uint32_t
 hash_get_block_32(const uint32_t *p, int i) {
 	/* Handle unaligned read. */
-	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
+	if (unlikely((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0) {
 		uint32_t ret;
 
 		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t));
@@ -37,7 +38,7 @@ hash_get_block_32(const uint32_t *p, int i) {
 static inline uint64_t
 hash_get_block_64(const uint64_t *p, int i) {
 	/* Handle unaligned read. */
-	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
+	if (unlikely((uintptr_t)p & (sizeof(uint64_t) - 1)) != 0) {
 		uint64_t ret;
 
 		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t));
@@ -71,8 +72,8 @@ hash_fmix_64(uint64_t k) {
 
 static inline uint32_t
 hash_x86_32(const void *key, int len, uint32_t seed) {
-	const uint8_t *data = (const uint8_t *) key;
-	const int nblocks = len / 4;
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 4;
 
 	uint32_t h1 = seed;
 
@@ -81,8 +82,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 
 	/* body */
 	{
-		const uint32_t *blocks = (const uint32_t *) (data + nblocks*4);
-		int i;
+		const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
+		int             i;
 
 		for (i = -nblocks; i; i++) {
 			uint32_t k1 = hash_get_block_32(blocks, i);
@@ -93,21 +94,29 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 
 			h1 ^= k1;
 			h1 = hash_rotl_32(h1, 13);
-			h1 = h1*5 + 0xe6546b64;
+			h1 = h1 * 5 + 0xe6546b64;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t *) (data + nblocks*4);
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
 
 		uint32_t k1 = 0;
 
 		switch (len & 3) {
-		case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH;
-		case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH;
-		case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
-			k1 *= c2; h1 ^= k1;
+		case 3:
+			k1 ^= tail[2] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= tail[1] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= tail[0];
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 		}
 	}
 
@@ -120,10 +129,9 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 }
 
 static inline void
-hash_x86_128(const void *key, const int len, uint32_t seed,
-    uint64_t r_out[2]) {
-	const uint8_t * data = (const uint8_t *) key;
-	const int nblocks = len / 16;
+hash_x86_128(const void *key, const int len, uint32_t seed, uint64_t r_out[2]) {
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 16;
 
 	uint32_t h1 = seed;
 	uint32_t h2 = seed;
@@ -137,95 +145,161 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 
 	/* body */
 	{
-		const uint32_t *blocks = (const uint32_t *) (data + nblocks*16);
-		int i;
+		const uint32_t *blocks = (const uint32_t *)(data
+		    + nblocks * 16);
+		int             i;
 
 		for (i = -nblocks; i; i++) {
-			uint32_t k1 = hash_get_block_32(blocks, i*4 + 0);
-			uint32_t k2 = hash_get_block_32(blocks, i*4 + 1);
-			uint32_t k3 = hash_get_block_32(blocks, i*4 + 2);
-			uint32_t k4 = hash_get_block_32(blocks, i*4 + 3);
+			uint32_t k1 = hash_get_block_32(blocks, i * 4 + 0);
+			uint32_t k2 = hash_get_block_32(blocks, i * 4 + 1);
+			uint32_t k3 = hash_get_block_32(blocks, i * 4 + 2);
+			uint32_t k4 = hash_get_block_32(blocks, i * 4 + 3);
 
-			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 
-			h1 = hash_rotl_32(h1, 19); h1 += h2;
-			h1 = h1*5 + 0x561ccd1b;
+			h1 = hash_rotl_32(h1, 19);
+			h1 += h2;
+			h1 = h1 * 5 + 0x561ccd1b;
 
-			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
+			k2 *= c2;
+			k2 = hash_rotl_32(k2, 16);
+			k2 *= c3;
+			h2 ^= k2;
 
-			h2 = hash_rotl_32(h2, 17); h2 += h3;
-			h2 = h2*5 + 0x0bcaa747;
+			h2 = hash_rotl_32(h2, 17);
+			h2 += h3;
+			h2 = h2 * 5 + 0x0bcaa747;
 
-			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+			k3 *= c3;
+			k3 = hash_rotl_32(k3, 17);
+			k3 *= c4;
+			h3 ^= k3;
 
-			h3 = hash_rotl_32(h3, 15); h3 += h4;
-			h3 = h3*5 + 0x96cd1c35;
+			h3 = hash_rotl_32(h3, 15);
+			h3 += h4;
+			h3 = h3 * 5 + 0x96cd1c35;
 
-			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
+			k4 *= c4;
+			k4 = hash_rotl_32(k4, 18);
+			k4 *= c1;
+			h4 ^= k4;
 
-			h4 = hash_rotl_32(h4, 13); h4 += h1;
-			h4 = h4*5 + 0x32ac3b17;
+			h4 = hash_rotl_32(h4, 13);
+			h4 += h1;
+			h4 = h4 * 5 + 0x32ac3b17;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t *) (data + nblocks*16);
-		uint32_t k1 = 0;
-		uint32_t k2 = 0;
-		uint32_t k3 = 0;
-		uint32_t k4 = 0;
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
+		uint32_t       k1 = 0;
+		uint32_t       k2 = 0;
+		uint32_t       k3 = 0;
+		uint32_t       k4 = 0;
 
 		switch (len & 15) {
-		case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH;
-		case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH;
-		case 13: k4 ^= tail[12] << 0;
-			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
+		case 15:
+			k4 ^= tail[14] << 16;
 			JEMALLOC_FALLTHROUGH;
-		case 12: k3 ^= (uint32_t) tail[11] << 24; JEMALLOC_FALLTHROUGH;
-		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH;
-		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH;
-		case  9: k3 ^= tail[ 8] << 0;
-			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+		case 14:
+			k4 ^= tail[13] << 8;
 			JEMALLOC_FALLTHROUGH;
-		case  8: k2 ^= (uint32_t) tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
-		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH;
-		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH;
-		case  5: k2 ^= tail[ 4] << 0;
-			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
+		case 13:
+			k4 ^= tail[12] << 0;
+			k4 *= c4;
+			k4 = hash_rotl_32(k4, 18);
+			k4 *= c1;
+			h4 ^= k4;
 			JEMALLOC_FALLTHROUGH;
-		case  4: k1 ^= (uint32_t) tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
-		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH;
-		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH;
-		case  1: k1 ^= tail[ 0] << 0;
-			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+		case 12:
+			k3 ^= (uint32_t)tail[11] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 11:
+			k3 ^= tail[10] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 10:
+			k3 ^= tail[9] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 9:
+			k3 ^= tail[8] << 0;
+			k3 *= c3;
+			k3 = hash_rotl_32(k3, 17);
+			k3 *= c4;
+			h3 ^= k3;
+			JEMALLOC_FALLTHROUGH;
+		case 8:
+			k2 ^= (uint32_t)tail[7] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 7:
+			k2 ^= tail[6] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 6:
+			k2 ^= tail[5] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 5:
+			k2 ^= tail[4] << 0;
+			k2 *= c2;
+			k2 = hash_rotl_32(k2, 16);
+			k2 *= c3;
+			h2 ^= k2;
+			JEMALLOC_FALLTHROUGH;
+		case 4:
+			k1 ^= (uint32_t)tail[3] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 3:
+			k1 ^= tail[2] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= tail[1] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= tail[0] << 0;
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 			break;
 		}
 	}
 
 	/* finalization */
-	h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+	h1 ^= len;
+	h2 ^= len;
+	h3 ^= len;
+	h4 ^= len;
 
-	h1 += h2; h1 += h3; h1 += h4;
-	h2 += h1; h3 += h1; h4 += h1;
+	h1 += h2;
+	h1 += h3;
+	h1 += h4;
+	h2 += h1;
+	h3 += h1;
+	h4 += h1;
 
 	h1 = hash_fmix_32(h1);
 	h2 = hash_fmix_32(h2);
 	h3 = hash_fmix_32(h3);
 	h4 = hash_fmix_32(h4);
 
-	h1 += h2; h1 += h3; h1 += h4;
-	h2 += h1; h3 += h1; h4 += h1;
+	h1 += h2;
+	h1 += h3;
+	h1 += h4;
+	h2 += h1;
+	h3 += h1;
+	h4 += h1;
 
-	r_out[0] = (((uint64_t) h2) << 32) | h1;
-	r_out[1] = (((uint64_t) h4) << 32) | h3;
+	r_out[0] = (((uint64_t)h2) << 32) | h1;
+	r_out[1] = (((uint64_t)h4) << 32) | h3;
 }
 
 static inline void
-hash_x64_128(const void *key, const int len, const uint32_t seed,
-    uint64_t r_out[2]) {
-	const uint8_t *data = (const uint8_t *) key;
-	const int nblocks = len / 16;
+hash_x64_128(
+    const void *key, const int len, const uint32_t seed, uint64_t r_out[2]) {
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 16;
 
 	uint64_t h1 = seed;
 	uint64_t h2 = seed;
@@ -235,56 +309,99 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 
 	/* body */
 	{
-		const uint64_t *blocks = (const uint64_t *) (data);
-		int i;
+		const uint64_t *blocks = (const uint64_t *)(data);
+		int             i;
 
 		for (i = 0; i < nblocks; i++) {
-			uint64_t k1 = hash_get_block_64(blocks, i*2 + 0);
-			uint64_t k2 = hash_get_block_64(blocks, i*2 + 1);
+			uint64_t k1 = hash_get_block_64(blocks, i * 2 + 0);
+			uint64_t k2 = hash_get_block_64(blocks, i * 2 + 1);
 
-			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+			k1 *= c1;
+			k1 = hash_rotl_64(k1, 31);
+			k1 *= c2;
+			h1 ^= k1;
 
-			h1 = hash_rotl_64(h1, 27); h1 += h2;
-			h1 = h1*5 + 0x52dce729;
+			h1 = hash_rotl_64(h1, 27);
+			h1 += h2;
+			h1 = h1 * 5 + 0x52dce729;
 
-			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
+			k2 *= c2;
+			k2 = hash_rotl_64(k2, 33);
+			k2 *= c1;
+			h2 ^= k2;
 
-			h2 = hash_rotl_64(h2, 31); h2 += h1;
-			h2 = h2*5 + 0x38495ab5;
+			h2 = hash_rotl_64(h2, 31);
+			h2 += h1;
+			h2 = h2 * 5 + 0x38495ab5;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t*)(data + nblocks*16);
-		uint64_t k1 = 0;
-		uint64_t k2 = 0;
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
+		uint64_t       k1 = 0;
+		uint64_t       k2 = 0;
 
 		switch (len & 15) {
-		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH;
-		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH;
-		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH;
-		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH;
-		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH;
-		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  JEMALLOC_FALLTHROUGH;
-		case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
-			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
+		case 15:
+			k2 ^= ((uint64_t)(tail[14])) << 48;
 			JEMALLOC_FALLTHROUGH;
-		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH;
-		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH;
-		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH;
-		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH;
-		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH;
-		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH;
-		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  JEMALLOC_FALLTHROUGH;
-		case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
-			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+		case 14:
+			k2 ^= ((uint64_t)(tail[13])) << 40;
+			JEMALLOC_FALLTHROUGH;
+		case 13:
+			k2 ^= ((uint64_t)(tail[12])) << 32;
+			JEMALLOC_FALLTHROUGH;
+		case 12:
+			k2 ^= ((uint64_t)(tail[11])) << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 11:
+			k2 ^= ((uint64_t)(tail[10])) << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 10:
+			k2 ^= ((uint64_t)(tail[9])) << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 9:
+			k2 ^= ((uint64_t)(tail[8])) << 0;
+			k2 *= c2;
+			k2 = hash_rotl_64(k2, 33);
+			k2 *= c1;
+			h2 ^= k2;
+			JEMALLOC_FALLTHROUGH;
+		case 8:
+			k1 ^= ((uint64_t)(tail[7])) << 56;
+			JEMALLOC_FALLTHROUGH;
+		case 7:
+			k1 ^= ((uint64_t)(tail[6])) << 48;
+			JEMALLOC_FALLTHROUGH;
+		case 6:
+			k1 ^= ((uint64_t)(tail[5])) << 40;
+			JEMALLOC_FALLTHROUGH;
+		case 5:
+			k1 ^= ((uint64_t)(tail[4])) << 32;
+			JEMALLOC_FALLTHROUGH;
+		case 4:
+			k1 ^= ((uint64_t)(tail[3])) << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 3:
+			k1 ^= ((uint64_t)(tail[2])) << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= ((uint64_t)(tail[1])) << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= ((uint64_t)(tail[0])) << 0;
+			k1 *= c1;
+			k1 = hash_rotl_64(k1, 31);
+			k1 *= c2;
+			h1 ^= k1;
 			break;
 		}
 	}
 
 	/* finalization */
-	h1 ^= len; h2 ^= len;
+	h1 ^= len;
+	h2 ^= len;
 
 	h1 += h2;
 	h2 += h1;
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index ee246b1e..bbbcb320 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HOOK_H
 #define JEMALLOC_INTERNAL_HOOK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /*
@@ -55,6 +56,7 @@ enum hook_alloc_e {
 	hook_alloc_calloc,
 	hook_alloc_memalign,
 	hook_alloc_valloc,
+	hook_alloc_pvalloc,
 	hook_alloc_mallocx,
 
 	/* The reallocating functions have both alloc and dalloc variants */
@@ -81,7 +83,6 @@ enum hook_dalloc_e {
 };
 typedef enum hook_dalloc_e hook_dalloc_t;
 
-
 enum hook_expand_e {
 	hook_expand_realloc,
 	hook_expand_rallocx,
@@ -89,23 +90,22 @@ enum hook_expand_e {
 };
 typedef enum hook_expand_e hook_expand_t;
 
-typedef void (*hook_alloc)(
-    void *extra, hook_alloc_t type, void *result, uintptr_t result_raw,
-    uintptr_t args_raw[3]);
+typedef void (*hook_alloc)(void *extra, hook_alloc_t type, void *result,
+    uintptr_t result_raw, uintptr_t args_raw[3]);
 
 typedef void (*hook_dalloc)(
     void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
 
-typedef void (*hook_expand)(
-    void *extra, hook_expand_t type, void *address, size_t old_usize,
-    size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
+typedef void (*hook_expand)(void *extra, hook_expand_t type, void *address,
+    size_t old_usize, size_t new_usize, uintptr_t result_raw,
+    uintptr_t args_raw[4]);
 
 typedef struct hooks_s hooks_t;
 struct hooks_s {
-	hook_alloc alloc_hook;
+	hook_alloc  alloc_hook;
 	hook_dalloc dalloc_hook;
 	hook_expand expand_hook;
-	void *extra;
+	void       *extra;
 };
 
 /*
@@ -143,9 +143,9 @@ struct hook_ralloc_args_s {
  * Returns an opaque handle to be used when removing the hook.  NULL means that
  * we couldn't install the hook.
  */
-bool hook_boot();
+bool hook_boot(void);
 
-void *hook_install(tsdn_t *tsdn, hooks_t *hooks);
+void *hook_install(tsdn_t *tsdn, hooks_t *to_install);
 /* Uninstalls the hook with the handle previously returned from hook_install. */
 void hook_remove(tsdn_t *tsdn, void *opaque);
 
@@ -154,8 +154,8 @@ void hook_remove(tsdn_t *tsdn, void *opaque);
 void hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
     uintptr_t args_raw[3]);
 
-void hook_invoke_dalloc(hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]);
+void hook_invoke_dalloc(
+    hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
 
 void hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
     size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index f3562853..dc7725b7 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -1,42 +1,18 @@
 #ifndef JEMALLOC_INTERNAL_HPA_H
 #define JEMALLOC_INTERNAL_HPA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/hpa_central.h"
 #include "jemalloc/internal/hpa_hooks.h"
 #include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
-
-typedef struct hpa_central_s hpa_central_t;
-struct hpa_central_s {
-	/*
-	 * The mutex guarding most of the operations on the central data
-	 * structure.
-	 */
-	malloc_mutex_t mtx;
-	/*
-	 * Guards expansion of eden.  We separate this from the regular mutex so
-	 * that cheaper operations can still continue while we're doing the OS
-	 * call.
-	 */
-	malloc_mutex_t grow_mtx;
-	/*
-	 * Either NULL (if empty), or some integer multiple of a
-	 * hugepage-aligned number of hugepages.  We carve them off one at a
-	 * time to satisfy new pageslab requests.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	void *eden;
-	size_t eden_len;
-	/* Source for metadata. */
-	base_t *base;
-	/* Number of grow operations done on this hpa_central_t. */
-	uint64_t age_counter;
-
-	/* The HPA hooks. */
-	hpa_hooks_t hooks;
-};
+#include "jemalloc/internal/sec.h"
 
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
@@ -61,6 +37,14 @@ struct hpa_shard_nonderived_stats_s {
 	 * Guarded by mtx.
 	 */
 	uint64_t nhugifies;
+
+	/*
+	 * The number of times we've tried to hugify a pageslab, but failed.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t nhugify_failures;
+
 	/*
 	 * The number of times we've dehugified a pageslab.
 	 *
@@ -72,8 +56,9 @@ struct hpa_shard_nonderived_stats_s {
 /* Completely derived; only used by CTL. */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
-	psset_stats_t psset_stats;
+	psset_stats_t                psset_stats;
 	hpa_shard_nonderived_stats_t nonderived_stats;
+	sec_stats_t                  secstats;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -86,14 +71,17 @@ struct hpa_shard_s {
 
 	/* The central allocator we get our hugepages from. */
 	hpa_central_t *central;
+
 	/* Protects most of this shard's state. */
 	malloc_mutex_t mtx;
+
 	/*
 	 * Guards the shard's access to the central allocator (preventing
 	 * multiple threads operating on this shard from accessing the central
 	 * allocator).
 	 */
 	malloc_mutex_t grow_mtx;
+
 	/* The base metadata allocator. */
 	base_t *base;
 
@@ -104,6 +92,9 @@ struct hpa_shard_s {
 	 */
 	edata_cache_fast_t ecf;
 
+	/* Small extent cache (not guarded by mtx) */
+	JEMALLOC_ALIGNED(CACHELINE) sec_t sec;
+
 	psset_t psset;
 
 	/*
@@ -141,22 +132,31 @@ struct hpa_shard_s {
 	 * Last time we performed purge on this shard.
 	 */
 	nstime_t last_purge;
+
+	/*
+	 * Last time when we attempted work (purging or hugifying). If deferral
+	 * of the work is allowed (we have background thread), this is the time
+	 * when background thread checked if purging or hugifying needs to be
+	 * done. If deferral is not allowed, this is the time of (hpa_alloc or
+	 * hpa_dalloc) activity in the shard.
+	 */
+	nstime_t last_time_work_attempted;
 };
 
+bool hpa_hugepage_size_exceeds_limit(void);
 /*
  * Whether or not the HPA can be used given the current configuration.  This is
  * is not necessarily a guarantee that it backs its allocations by hugepages,
  * just that it can function properly given the system it's running on.
  */
-bool hpa_supported();
-bool hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
-bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
-    base_t *base, edata_cache_t *edata_cache, unsigned ind,
-    const hpa_shard_opts_t *opts);
+bool hpa_supported(void);
+bool hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
+    emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
-void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpa_shard_stats_t *dst);
+void hpa_shard_stats_merge(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
 
 /*
  * Notify the shard that we won't use it for allocations much longer.  Due to
@@ -165,15 +165,18 @@ void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
  */
 void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
+/* Flush caches that shard may be using */
+void hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard);
 
-void hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool deferral_allowed);
+void hpa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed);
 void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
  * We share the fork ordering with the PA and arena prefork handling; that's why
- * these are 3 and 4 rather than 0 and 1.
+ * these are 2, 3 and 4 rather than 0 and 1.
  */
+void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
new file mode 100644
index 00000000..3e0ff7da
--- /dev/null
+++ b/include/jemalloc/internal/hpa_central.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_HPA_CENTRAL_H
+#define JEMALLOC_INTERNAL_HPA_CENTRAL_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/hpa_hooks.h"
+#include "jemalloc/internal/hpdata.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd_types.h"
+
+typedef struct hpa_central_s hpa_central_t;
+struct hpa_central_s {
+	/*
+	 * Guards expansion of eden.  We separate this from the regular mutex so
+	 * that cheaper operations can still continue while we're doing the OS
+	 * call.
+	 */
+	malloc_mutex_t grow_mtx;
+	/*
+	 * Either NULL (if empty), or some integer multiple of a
+	 * hugepage-aligned number of hugepages.  We carve them off one at a
+	 * time to satisfy new pageslab requests.
+	 *
+	 * Guarded by grow_mtx.
+	 */
+	void  *eden;
+	size_t eden_len;
+	/* Source for metadata. */
+	base_t *base;
+
+	/* The HPA hooks. */
+	hpa_hooks_t hooks;
+};
+
+bool hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
+
+hpdata_t *hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    uint64_t age, bool hugify_eager, bool *oom);
+
+#endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 4ea221cb..f50ff58f 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -1,17 +1,21 @@
 #ifndef JEMALLOC_INTERNAL_HPA_HOOKS_H
 #define JEMALLOC_INTERNAL_HPA_HOOKS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/nstime.h"
+
 typedef struct hpa_hooks_s hpa_hooks_t;
 struct hpa_hooks_s {
 	void *(*map)(size_t size);
 	void (*unmap)(void *ptr, size_t size);
 	void (*purge)(void *ptr, size_t size);
-	void (*hugify)(void *ptr, size_t size);
+	bool (*hugify)(void *ptr, size_t size, bool sync);
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
+	bool (*vectorized_purge)(void *vec, size_t vlen, size_t nbytes);
 };
 
-extern hpa_hooks_t hpa_hooks_default;
+extern const hpa_hooks_t hpa_hooks_default;
 
 #endif /* JEMALLOC_INTERNAL_HPA_HOOKS_H */
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index ee84fea1..6747c2db 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -1,13 +1,66 @@
 #ifndef JEMALLOC_INTERNAL_HPA_OPTS_H
 #define JEMALLOC_INTERNAL_HPA_OPTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/fxp.h"
 
 /*
  * This file is morally part of hpa.h, but is split out for header-ordering
  * reasons.
+ *
+ * All of these hpa_shard_opts below are experimental. We are exploring more
+ * efficient packing, hugifying, and purging approaches to make efficient
+ * trade-offs between CPU, memory, latency, and usability. This means all of
+ * them are at the risk of being deprecated and corresponding configurations
+ * should be updated once the final version settles.
  */
 
+/*
+ * This enum controls how jemalloc hugifies/dehugifies pages.  Each style may be
+ * more suitable depending on deployment environments.
+ *
+ * hpa_hugify_style_none
+ * Using this means that jemalloc will not be hugifying or dehugifying pages,
+ * but will let the kernel make those decisions.  This style only makes sense
+ * when deploying on systems where THP are enabled in 'always' mode.  With this
+ * style, you most likely want to have no purging at all (dirty_mult=-1) or
+ * purge_threshold=HUGEPAGE bytes (2097152 for 2Mb page), although other
+ * thresholds may work well depending on kernel settings of your deployment
+ * targets.
+ *
+ * hpa_hugify_style_eager
+ * This style results in jemalloc giving hugepage advice, if needed, to
+ * anonymous memory immediately after it is mapped, so huge pages can be backing
+ * that memory at page-fault time.  This is usually more efficient than doing
+ * it later, and it allows us to benefit from the hugepages from the start.
+ * Same options for purging as for the style 'none' are good starting choices:
+ * no purging, or purge_threshold=HUGEPAGE, some min_purge_delay_ms that allows
+ * for page not to be purged quickly, etc.  This is a good choice if you can
+ * afford extra memory and your application gets performance increase from
+ * transparent hughepages.
+ *
+ * hpa_hugify_style_lazy
+ * This style is suitable when you purge more aggressively (you sacrifice CPU
+ * performance for less memory).  When this style is chosen, jemalloc will
+ * hugify once hugification_threshold is reached, and dehugify before purging.
+ * If the kernel is configured to use direct compaction you may experience some
+ * allocation latency when using this style.  The best is to measure what works
+ * better for your application needs, and in the target deployment environment.
+ * This is a good choice for apps that cannot afford a lot of memory regression,
+ * but would still like to benefit from backing certain memory regions with
+ * hugepages.
+ */
+enum hpa_hugify_style_e {
+	hpa_hugify_style_auto = 0,
+	hpa_hugify_style_none = 1,
+	hpa_hugify_style_eager = 2,
+	hpa_hugify_style_lazy = 3,
+	hpa_hugify_style_limit = hpa_hugify_style_lazy + 1
+};
+typedef enum hpa_hugify_style_e hpa_hugify_style_t;
+
+extern const char *const hpa_hugify_style_names[];
+
 typedef struct hpa_shard_opts_s hpa_shard_opts_t;
 struct hpa_shard_opts_s {
 	/*
@@ -44,12 +97,64 @@ struct hpa_shard_opts_s {
 	 */
 	uint64_t hugify_delay_ms;
 
+	/*
+	 * Hugify pages synchronously (hugify will happen even if hugify_style
+	 * is not hpa_hugify_style_lazy).
+	 */
+	bool hugify_sync;
+
 	/*
 	 * Minimum amount of time between purges.
 	 */
 	uint64_t min_purge_interval_ms;
+
+	/*
+	 * Maximum number of hugepages to purge on each purging attempt.
+	 */
+	ssize_t experimental_max_purge_nhp;
+
+	/*
+	 * Minimum number of inactive bytes needed for a non-empty page to be
+	 * considered purgable.
+	 *
+	 * When the number of touched inactive bytes on non-empty hugepage is
+	 * >= purge_threshold, the page is purgable.  Empty pages are always
+	 * purgable.  Setting this to HUGEPAGE bytes would only purge empty
+	 * pages if using hugify_style_eager and the purges would be exactly
+	 * HUGEPAGE bytes.  Depending on your kernel settings, this may result
+	 * in better performance.
+	 *
+	 * Please note, when threshold is reached, we will purge all the dirty
+	 * bytes, and not just up to the threshold.  If this is PAGE bytes, then
+	 * all the pages that have any dirty bytes are purgable.  We treat
+	 * purgability constraint for purge_threshold as stronger than
+	 * dirty_mult, IOW, if no page meets purge_threshold, we will not purge
+	 * even if we are above dirty_mult.
+	 */
+	size_t purge_threshold;
+
+	/*
+	 * Minimum number of ms that needs to elapse between HP page becoming
+	 * eligible for purging and actually getting purged.
+	 *
+	 * Setting this to a larger number would give better chance of reusing
+	 * that memory.  Setting it to 0 means that page is eligible for purging
+	 * as soon as it meets the purge_threshold.  The clock resets when
+	 * purgability of the page changes (page goes from being non-purgable to
+	 * purgable).  When using eager style you probably want to allow for
+	 * some delay, to avoid purging the page too quickly and give it time to
+	 * be used.
+	 */
+	uint64_t min_purge_delay_ms;
+
+	/*
+	 * Style of hugification/dehugification (see comment at
+	 * hpa_hugify_style_t for options).
+	 */
+	hpa_hugify_style_t hugify_style;
 };
 
+/* clang-format off */
 #define HPA_SHARD_OPTS_DEFAULT {					\
 	/* slab_max_alloc */						\
 	64 * 1024,							\
@@ -67,8 +172,19 @@ struct hpa_shard_opts_s {
 	false,								\
 	/* hugify_delay_ms */						\
 	10 * 1000,							\
+	/* hugify_sync */						\
+	false,								\
 	/* min_purge_interval_ms */					\
-	5 * 1000							\
+	5 * 1000,							\
+	/* experimental_max_purge_nhp */				\
+	-1,      							\
+	/* size_t purge_threshold */					\
+	PAGE,								\
+	/* min_purge_delay_ms */             				\
+	0,  								\
+	/* hugify_style */                				\
+	hpa_hugify_style_lazy						\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
new file mode 100644
index 00000000..6b006cff
--- /dev/null
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -0,0 +1,161 @@
+#ifndef JEMALLOC_INTERNAL_HPA_UTILS_H
+#define JEMALLOC_INTERNAL_HPA_UTILS_H
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/extent.h"
+
+#define HPA_MIN_VAR_VEC_SIZE 8
+/*
+ * This is used for jemalloc internal tuning and may change in the future based
+ * on production traffic.
+ *
+ * This value protects two things:
+ *    1. Stack size
+ *    2. Number of huge pages that are being purged in a batch as we do not
+ *       allow allocations while making madvise syscall.
+ */
+#define HPA_PURGE_BATCH_MAX 16
+
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+typedef struct iovec hpa_io_vector_t;
+#else
+typedef struct {
+	void  *iov_base;
+	size_t iov_len;
+} hpa_io_vector_t;
+#endif
+
+static inline size_t
+hpa_process_madvise_max_iovec_len(void) {
+	assert(
+	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	return opt_process_madvise_max_batch == 0
+	    ? HPA_MIN_VAR_VEC_SIZE
+	    : opt_process_madvise_max_batch;
+}
+
+/* Actually invoke hooks. If we fail vectorized, use single purges */
+static void
+hpa_try_vectorized_purge(
+    hpa_hooks_t *hooks, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
+	bool success = opt_process_madvise_max_batch > 0
+	    && !hooks->vectorized_purge(vec, vlen, nbytes);
+	if (!success) {
+		/* On failure, it is safe to purge again (potential perf
+		 * penalty) If kernel can tell exactly which regions
+		 * failed, we could avoid that penalty.
+		 */
+		for (size_t i = 0; i < vlen; ++i) {
+			hooks->purge(vec[i].iov_base, vec[i].iov_len);
+		}
+	}
+}
+
+/*
+ * This structure accumulates the regions for process_madvise. It invokes the
+ * hook when batch limit is reached.
+ */
+typedef struct {
+	hpa_io_vector_t *vp;
+	size_t           cur;
+	size_t           total_bytes;
+	size_t           capacity;
+} hpa_range_accum_t;
+
+static inline void
+hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) {
+	ra->vp = v;
+	ra->capacity = sz;
+	ra->total_bytes = 0;
+	ra->cur = 0;
+}
+
+static inline void
+hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_hooks_t *hooks) {
+	assert(ra->total_bytes > 0 && ra->cur > 0);
+	hpa_try_vectorized_purge(hooks, ra->vp, ra->cur, ra->total_bytes);
+	ra->cur = 0;
+	ra->total_bytes = 0;
+}
+
+static inline void
+hpa_range_accum_add(
+    hpa_range_accum_t *ra, void *addr, size_t sz, hpa_hooks_t *hooks) {
+	assert(ra->cur < ra->capacity);
+
+	ra->vp[ra->cur].iov_base = addr;
+	ra->vp[ra->cur].iov_len = sz;
+	ra->total_bytes += sz;
+	ra->cur++;
+
+	if (ra->cur == ra->capacity) {
+		hpa_range_accum_flush(ra, hooks);
+	}
+}
+
+static inline void
+hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_hooks_t *hooks) {
+	if (ra->cur > 0) {
+		hpa_range_accum_flush(ra, hooks);
+	}
+}
+
+/*
+ * For purging more than one page we use batch of these items
+ */
+typedef struct {
+	hpdata_purge_state_t state;
+	hpdata_t            *hp;
+	bool                 dehugify;
+} hpa_purge_item_t;
+
+typedef struct hpa_purge_batch_s hpa_purge_batch_t;
+struct hpa_purge_batch_s {
+	hpa_purge_item_t *items;
+	size_t            items_capacity;
+	/* Number of huge pages to purge in current batch */
+	size_t item_cnt;
+	/* Number of ranges to purge in current batch */
+	size_t nranges;
+	/* Total number of dirty pages in current batch*/
+	size_t ndirty_in_batch;
+
+	/* Max number of huge pages to purge */
+	size_t max_hp;
+	/*
+	 * Once we are above this watermark we should not add more pages
+	 * to the same batch. This is because while we want to minimize
+	 * number of madvise calls we also do not want to be preventing
+	 * allocations from too many huge pages (which we have to do
+	 * while they are being purged)
+	 */
+	size_t range_watermark;
+
+	size_t npurged_hp_total;
+};
+
+static inline bool
+hpa_batch_full(hpa_purge_batch_t *b) {
+	/* It's okay for ranges to go above */
+	return b->npurged_hp_total == b->max_hp
+	    || b->item_cnt == b->items_capacity
+	    || b->nranges >= b->range_watermark;
+}
+
+static inline void
+hpa_batch_pass_start(hpa_purge_batch_t *b) {
+	b->item_cnt = 0;
+	b->nranges = 0;
+	b->ndirty_in_batch = 0;
+}
+
+static inline bool
+hpa_batch_empty(hpa_purge_batch_t *b) {
+	return b->item_cnt == 0;
+}
+
+/* Purge pages in a batch using given hooks */
+void hpa_purge_batch(
+    hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz);
+
+#endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 1fb534db..a9c507f0 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_HPDATA_H
 #define JEMALLOC_INTERNAL_HPDATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/fb.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/typed_list.h"
@@ -17,8 +20,14 @@
  * an observable property of any given region of address space).  It's just
  * hugepage-sized and hugepage-aligned; it's *potentially* huge.
  */
+
+/*
+ * The max enumeration num should not exceed 2^16 - 1, see comments in edata.h
+ * for ESET_ENUMERATE_MAX_NUM for more details.
+ */
+#define PSSET_ENUMERATE_MAX_NUM 32
 typedef struct hpdata_s hpdata_t;
-ph_structs(hpdata_age_heap, hpdata_t);
+ph_structs(hpdata_age_heap, hpdata_t, PSSET_ENUMERATE_MAX_NUM);
 struct hpdata_s {
 	/*
 	 * We likewise follow the edata convention of mangling names and forcing
@@ -64,7 +73,7 @@ struct hpdata_s {
 	bool h_hugify_allowed;
 	/* When we became a hugification candidate. */
 	nstime_t h_time_hugify_allowed;
-	bool h_in_psset_hugify_container;
+	bool     h_in_psset_hugify_container;
 
 	/* Whether or not a purge or hugify is currently happening. */
 	bool h_mid_purge;
@@ -115,6 +124,12 @@ struct hpdata_s {
 
 	/* The touched pages (using the same definition as above). */
 	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+
+	/* Time when this extent (hpdata) becomes eligible for purging */
+	nstime_t h_time_purge_allowed;
+
+	/* True if the extent was huge and empty last time when it was purged */
+	bool h_purged_when_empty_and_huge;
 };
 
 TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
@@ -177,8 +192,8 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
-       assert(purge_allowed == false || !hpdata->h_mid_purge);
-       hpdata->h_purge_allowed = purge_allowed;
+	assert(purge_allowed == false || !hpdata->h_mid_purge);
+	hpdata->h_purge_allowed = purge_allowed;
 }
 
 static inline bool
@@ -241,7 +256,6 @@ hpdata_changing_state_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
 }
 
-
 static inline bool
 hpdata_updating_get(const hpdata_t *hpdata) {
 	return hpdata->h_updating;
@@ -276,17 +290,17 @@ hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
 }
 
 static inline size_t
-hpdata_nactive_get(hpdata_t *hpdata) {
+hpdata_nactive_get(const hpdata_t *hpdata) {
 	return hpdata->h_nactive;
 }
 
 static inline size_t
-hpdata_ntouched_get(hpdata_t *hpdata) {
+hpdata_ntouched_get(const hpdata_t *hpdata) {
 	return hpdata->h_ntouched;
 }
 
 static inline size_t
-hpdata_ndirty_get(hpdata_t *hpdata) {
+hpdata_ndirty_get(const hpdata_t *hpdata) {
 	return hpdata->h_ntouched - hpdata->h_nactive;
 }
 
@@ -295,6 +309,26 @@ hpdata_nretained_get(hpdata_t *hpdata) {
 	return HUGEPAGE_PAGES - hpdata->h_ntouched;
 }
 
+static inline void
+hpdata_time_purge_allowed_set(hpdata_t *hpdata, const nstime_t *v) {
+	nstime_copy(&hpdata->h_time_purge_allowed, v);
+}
+
+static inline const nstime_t *
+hpdata_time_purge_allowed_get(const hpdata_t *hpdata) {
+	return &hpdata->h_time_purge_allowed;
+}
+
+static inline bool
+hpdata_purged_when_empty_and_huge_get(const hpdata_t *hpdata) {
+	return hpdata->h_purged_when_empty_and_huge;
+}
+
+static inline void
+hpdata_purged_when_empty_and_huge_set(hpdata_t *hpdata, bool v) {
+	hpdata->h_purged_when_empty_and_huge = v;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@@ -308,58 +342,95 @@ hpdata_assert_empty(hpdata_t *hpdata) {
  */
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
-	if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
-	    != hpdata_longest_free_range_get(hpdata)) {
-		return false;
+	bool res = true;
+
+	const size_t active_urange_longest = fb_urange_longest(
+	    hpdata->active_pages, HUGEPAGE_PAGES);
+	const size_t longest_free_range = hpdata_longest_free_range_get(hpdata);
+	if (active_urange_longest != longest_free_range) {
+		malloc_printf(
+		    "<jemalloc>: active_fb_urange_longest=%zu != hpdata_longest_free_range=%zu\n",
+		    active_urange_longest, longest_free_range);
+		res = false;
 	}
-	if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata->h_nactive) {
-		return false;
+
+	const size_t active_scount = fb_scount(
+	    hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	if (active_scount != hpdata->h_nactive) {
+		malloc_printf(
+		    "<jemalloc>: active_fb_scount=%zu != hpdata_nactive=%zu\n",
+		    active_scount, hpdata->h_nactive);
+		res = false;
 	}
-	if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata->h_ntouched) {
-		return false;
+
+	const size_t touched_scount = fb_scount(
+	    hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	if (touched_scount != hpdata->h_ntouched) {
+		malloc_printf(
+		    "<jemalloc>: touched_fb_scount=%zu != hpdata_ntouched=%zu\n",
+		    touched_scount, hpdata->h_ntouched);
+		res = false;
 	}
+
 	if (hpdata->h_ntouched < hpdata->h_nactive) {
-		return false;
+		malloc_printf(
+		    "<jemalloc>: hpdata_ntouched=%zu < hpdata_nactive=%zu\n",
+		    hpdata->h_ntouched, hpdata->h_nactive);
+		res = false;
 	}
-	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
-		return false;
+
+	if (hpdata->h_huge && (hpdata->h_ntouched != HUGEPAGE_PAGES)) {
+		malloc_printf(
+		    "<jemalloc>: hpdata_huge=%d && (hpdata_ntouched=%zu != hugepage_pages=%zu)\n",
+		    hpdata->h_huge, hpdata->h_ntouched, HUGEPAGE_PAGES);
+		res = false;
 	}
-	if (hpdata_changing_state_get(hpdata)
-	    && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
-		return false;
+
+	const bool changing_state = hpdata_changing_state_get(hpdata);
+	if (changing_state
+	    && (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
+		malloc_printf(
+		    "<jemalloc>: hpdata_changing_state=%d && (hpdata_purge_allowed=%d || hpdata_hugify_allowed=%d)\n",
+		    changing_state, hpdata->h_purge_allowed,
+		    hpdata->h_hugify_allowed);
+		res = false;
 	}
+
 	if (hpdata_hugify_allowed_get(hpdata)
 	    != hpdata_in_psset_hugify_container_get(hpdata)) {
-		return false;
+		malloc_printf(
+		    "<jemalloc>: hpdata_hugify_allowed=%d != hpdata_in_psset_hugify_container=%d\n",
+		    hpdata_hugify_allowed_get(hpdata),
+		    hpdata_in_psset_hugify_container_get(hpdata));
+		res = false;
 	}
-	return true;
+
+	return res;
 }
 
-static inline void
-hpdata_assert_consistent(hpdata_t *hpdata) {
-	assert(hpdata_consistent(hpdata));
-}
+#define hpdata_assert_consistent(hpdata)                                       \
+	do {                                                                   \
+		assert(hpdata_consistent(hpdata));                             \
+	} while (0)
 
 static inline bool
-hpdata_empty(hpdata_t *hpdata) {
+hpdata_empty(const hpdata_t *hpdata) {
 	return hpdata->h_nactive == 0;
 }
 
 static inline bool
-hpdata_full(hpdata_t *hpdata) {
+hpdata_full(const hpdata_t *hpdata) {
 	return hpdata->h_nactive == HUGEPAGE_PAGES;
 }
 
-void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
+void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge);
 
 /*
  * Given an hpdata which can serve an allocation request, pick and reserve an
  * offset within that allocation.
  */
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
-void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
+void  hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
 
 /*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
@@ -368,10 +439,10 @@ void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
  */
 typedef struct hpdata_purge_state_s hpdata_purge_state_t;
 struct hpdata_purge_state_s {
-	size_t npurged;
-	size_t ndirty_to_purge;
+	size_t     npurged;
+	size_t     ndirty_to_purge;
 	fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
-	size_t next_purge_search_begin;
+	size_t     next_purge_search_begin;
 };
 
 /*
@@ -386,9 +457,11 @@ struct hpdata_purge_state_s {
  * until you're done, and then end.  Allocating out of an hpdata undergoing
  * purging is not allowed.
  *
- * Returns the number of dirty pages that will be purged.
+ * Returns the number of dirty pages that will be purged and sets nranges
+ * to number of ranges with dirty pages that will be purged.
  */
-size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+size_t hpdata_purge_begin(
+    hpdata_t *hpdata, hpdata_purge_state_t *purge_state, size_t *nranges);
 
 /*
  * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
diff --git a/include/jemalloc/internal/inspect.h b/include/jemalloc/internal/inspect.h
index 65fef51d..e8ed44d3 100644
--- a/include/jemalloc/internal/inspect.h
+++ b/include/jemalloc/internal/inspect.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_INSPECT_H
 #define JEMALLOC_INTERNAL_INSPECT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * This module contains the heap introspection capabilities.  For now they are
  * exposed purely through mallctl APIs in the experimental namespace, but this
@@ -23,7 +26,7 @@ typedef struct inspect_extent_util_stats_verbose_s
     inspect_extent_util_stats_verbose_t;
 
 struct inspect_extent_util_stats_verbose_s {
-	void *slabcur_addr;
+	void  *slabcur_addr;
 	size_t nfree;
 	size_t nregs;
 	size_t size;
@@ -31,10 +34,10 @@ struct inspect_extent_util_stats_verbose_s {
 	size_t bin_nregs;
 };
 
-void inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size);
+void inspect_extent_util_stats_get(
+    tsdn_t *tsdn, const void *ptr, size_t *nfree, size_t *nregs, size_t *size);
 void inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+    size_t *nfree, size_t *nregs, size_t *size, size_t *bin_nfree,
+    size_t *bin_nregs, void **slabcur_addr);
 
 #endif /* JEMALLOC_INTERNAL_INSPECT_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 983027c8..2ca12c4a 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -3,64 +3,65 @@
 
 #include <math.h>
 #ifdef _WIN32
-#  include <windows.h>
-#  include "msvc_compat/windows_extra.h"
-#  include "msvc_compat/strings.h"
-#  ifdef _WIN64
-#    if LG_VADDR <= 32
-#      error Generate the headers using x64 vcargs
-#    endif
-#  else
-#    if LG_VADDR > 32
-#      undef LG_VADDR
-#      define LG_VADDR 32
-#    endif
-#  endif
+#	include <windows.h>
+#	include "msvc_compat/windows_extra.h"
+#	include "msvc_compat/strings.h"
+#	ifdef _WIN64
+#		if LG_VADDR <= 32
+#			error Generate the headers using x64 vcargs
+#		endif
+#	else
+#		if LG_VADDR > 32
+#			undef LG_VADDR
+#			define LG_VADDR 32
+#		endif
+#	endif
 #else
-#  include <sys/param.h>
-#  include <sys/mman.h>
-#  if !defined(__pnacl__) && !defined(__native_client__)
-#    include <sys/syscall.h>
-#    if !defined(SYS_write) && defined(__NR_write)
-#      define SYS_write __NR_write
-#    endif
-#    if defined(SYS_open) && defined(__aarch64__)
-       /* Android headers may define SYS_open to __NR_open even though
+#	include <sys/param.h>
+#	include <sys/mman.h>
+#	if !defined(__pnacl__) && !defined(__native_client__)
+#		include <sys/syscall.h>
+#		if !defined(SYS_write) && defined(__NR_write)
+#			define SYS_write __NR_write
+#		endif
+#		if defined(SYS_open) && defined(__aarch64__)
+/* Android headers may define SYS_open to __NR_open even though
         * __NR_open may not exist on AArch64 (superseded by __NR_openat). */
-#      undef SYS_open
-#    endif
-#    include <sys/uio.h>
-#  endif
-#  include <pthread.h>
-#  if defined(__FreeBSD__) || defined(__DragonFly__)
-#  include <pthread_np.h>
-#  include <sched.h>
-#  if defined(__FreeBSD__)
-#    define cpu_set_t cpuset_t
-#  endif
-#  endif
-#  include <signal.h>
-#  ifdef JEMALLOC_OS_UNFAIR_LOCK
-#    include <os/lock.h>
-#  endif
-#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
-#    include <sched.h>
-#  endif
-#  include <errno.h>
-#  include <sys/time.h>
-#  include <time.h>
-#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
-#    include <mach/mach_time.h>
-#  endif
+#			undef SYS_open
+#		endif
+#		include <sys/uio.h>
+#	endif
+#	include <pthread.h>
+#	if defined(__FreeBSD__) || defined(__DragonFly__)                     \
+	    || defined(__OpenBSD__)
+#		include <pthread_np.h>
+#		include <sched.h>
+#		if defined(__FreeBSD__)
+#			define cpu_set_t cpuset_t
+#		endif
+#	endif
+#	include <signal.h>
+#	ifdef JEMALLOC_OS_UNFAIR_LOCK
+#		include <os/lock.h>
+#	endif
+#	ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#		include <sched.h>
+#	endif
+#	include <errno.h>
+#	include <sys/time.h>
+#	include <time.h>
+#	ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#		include <mach/mach_time.h>
+#	endif
 #endif
 #include <sys/types.h>
 
 #include <limits.h>
 #ifndef SIZE_T_MAX
-#  define SIZE_T_MAX	SIZE_MAX
+#	define SIZE_T_MAX SIZE_MAX
 #endif
 #ifndef SSIZE_MAX
-#  define SSIZE_MAX	((ssize_t)(SIZE_T_MAX >> 1))
+#	define SSIZE_MAX ((ssize_t)(SIZE_T_MAX >> 1))
 #endif
 #include <stdarg.h>
 #include <stdbool.h>
@@ -69,30 +70,30 @@
 #include <stdint.h>
 #include <stddef.h>
 #ifndef offsetof
-#  define offsetof(type, member)	((size_t)&(((type *)NULL)->member))
+#	define offsetof(type, member) ((size_t) & (((type *)NULL)->member))
 #endif
 #include <string.h>
 #include <strings.h>
 #include <ctype.h>
 #ifdef _MSC_VER
-#  include <io.h>
+#	include <io.h>
 typedef intptr_t ssize_t;
-#  define PATH_MAX 1024
-#  define STDERR_FILENO 2
-#  define __func__ __FUNCTION__
-#  ifdef JEMALLOC_HAS_RESTRICT
-#    define restrict __restrict
-#  endif
+#	define PATH_MAX 1024
+#	define STDERR_FILENO 2
+#	define __func__ __FUNCTION__
+#	ifdef JEMALLOC_HAS_RESTRICT
+#		define restrict __restrict
+#	endif
 /* Disable warnings about deprecated system functions. */
-#  pragma warning(disable: 4996)
-#if _MSC_VER < 1800
+#	pragma warning(disable : 4996)
+#	if _MSC_VER < 1800
 static int
 isblank(int c) {
 	return (c == '\t' || c == ' ');
 }
-#endif
+#	endif
 #else
-#  include <unistd.h>
+#	include <unistd.h>
 #endif
 #include <fcntl.h>
 
@@ -102,7 +103,24 @@ isblank(int c) {
  * classes.
  */
 #ifdef small
-#  undef small
+#	undef small
 #endif
 
+/*
+ * Oftentimes we'd like to perform some kind of arithmetic to obtain
+ * a pointer from another pointer but with some offset or mask applied.
+ * Naively you would accomplish this by casting the source pointer to
+ * `uintptr_t`, performing all of the relevant arithmetic, and then casting
+ * the result to the desired pointer type. However, this has the unfortunate
+ * side-effect of concealing pointer provenance, hiding useful information for
+ * optimization from the compiler (see here for details:
+ * https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html
+ * )
+ * Instead what one should do is cast the source pointer to `char *` and perform
+ * the equivalent arithmetic (since `char` of course represents one byte). But
+ * because `char *` has the semantic meaning of "string", we define this typedef
+ * simply to make it clearer where we are performing such pointer arithmetic.
+ */
+typedef char byte_t;
+
 #endif /* JEMALLOC_INTERNAL_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 3588072f..31ae2e8e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -14,10 +14,13 @@
  */
 #undef JEMALLOC_OVERRIDE___LIBC_CALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_FREE
+#undef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
+#undef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
 #undef JEMALLOC_OVERRIDE___LIBC_MALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 #undef JEMALLOC_OVERRIDE___LIBC_REALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_VALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_PVALLOC
 #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
 
 /*
@@ -88,6 +91,9 @@
 /* Defined if pthread_getname_np(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP
 
+/* Defined if pthread_set_name_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_SET_NAME_NP
+
 /* Defined if pthread_get_name_np(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP
 
@@ -111,6 +117,11 @@
  */
 #undef JEMALLOC_HAVE_CLOCK_REALTIME
 
+/*
+ * Defined if clock_gettime_nsec_np(CLOCK_UPTIME_RAW) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP
+
 /*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
@@ -161,6 +172,15 @@
 /* Use gcc intrinsics for profile backtracing if defined. */
 #undef JEMALLOC_PROF_GCC
 
+/* Use frame pointer for profile backtracing if defined. Linux only. */
+#undef JEMALLOC_PROF_FRAME_POINTER
+
+/* JEMALLOC_PAGEID enabled page id */
+#undef JEMALLOC_PAGEID
+
+/* JEMALLOC_HAVE_PRCTL checks prctl */
+#undef JEMALLOC_HAVE_PRCTL
+
 /*
  * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
  * segment (DSS).
@@ -259,6 +279,12 @@
  */
 #undef JEMALLOC_READLINKAT
 
+/*
+ * If defined, use getenv() (instead of secure_getenv() or
+ * alternatives) to access MALLOC_CONF.
+ */
+#undef JEMALLOC_FORCE_GETENV
+
 /*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
@@ -282,6 +308,13 @@
  */
 #undef JEMALLOC_HAVE_MADVISE_HUGE
 
+/*
+ * Defined if best-effort synchronous collapse of the native
+ * pages mapped by the memory range into transparent huge pages is supported
+ * via MADV_COLLAPSE arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_COLLAPSE
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
@@ -312,9 +345,23 @@
  */
 #undef JEMALLOC_MADVISE_NOCORE
 
+/* Defined if process_madvise(2) is available. */
+#undef JEMALLOC_HAVE_PROCESS_MADVISE
+
+#undef EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
+
 /* Defined if mprotect(2) is available. */
 #undef JEMALLOC_HAVE_MPROTECT
 
+/* Defined if sys/sdt.h is available and sdt tracing enabled */
+#undef JEMALLOC_EXPERIMENTAL_USDT_STAP
+
+/*
+ * Defined if sys/sdt.h is unavailable, sdt tracing enabled, and
+ * platform is supported
+ */
+#undef JEMALLOC_EXPERIMENTAL_USDT_CUSTOM
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
@@ -378,12 +425,18 @@
 /* Adaptive mutex support in pthreads. */
 #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
 
+/* gettid() support */
+#undef JEMALLOC_HAVE_GETTID
+
 /* GNU specific sched_getcpu support */
 #undef JEMALLOC_HAVE_SCHED_GETCPU
 
 /* GNU specific sched_setaffinity support */
 #undef JEMALLOC_HAVE_SCHED_SETAFFINITY
 
+/* pthread_setaffinity_np support */
+#undef JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP
+
 /*
  * If defined, all the features necessary for background threads are present.
  */
@@ -424,4 +477,18 @@
 /* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
 #undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
 
+/* If defined, use volatile asm during benchmarks. */
+#undef JEMALLOC_HAVE_ASM_VOLATILE
+
+/*
+ * If defined, support the use of rdtscp to get the time stamp counter
+ * and the processor ID.
+ */
+#undef JEMALLOC_HAVE_RDTSCP
+
+/* If defined, use __int128 for optimization. */
+#undef JEMALLOC_HAVE_INT128
+
+#include "jemalloc/internal/jemalloc_internal_overrides.h"
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index fc834c67..9911c199 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -1,39 +1,52 @@
 #ifndef JEMALLOC_INTERNAL_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTERNS_H
 
+#include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/fxp.h"
 #include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/sec_opts.h"
 #include "jemalloc/internal/tsd_types.h"
-#include "jemalloc/internal/nstime.h"
 
 /* TSD checks this to set thread local slow state accordingly. */
 extern bool malloc_slow;
 
 /* Run-time options. */
-extern bool opt_abort;
-extern bool opt_abort_conf;
-extern bool opt_trust_madvise;
-extern bool opt_confirm_conf;
-extern bool opt_hpa;
+extern bool             opt_abort;
+extern bool             opt_abort_conf;
+extern bool             opt_trust_madvise;
+extern bool             opt_experimental_hpa_start_huge_if_thp_always;
+extern bool             opt_experimental_hpa_enforce_hugify;
+extern bool             opt_confirm_conf;
+extern bool             opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
-extern sec_opts_t opt_hpa_sec_opts;
+extern sec_opts_t       opt_hpa_sec_opts;
 
 extern const char *opt_junk;
-extern bool opt_junk_alloc;
-extern bool opt_junk_free;
-extern void (*junk_free_callback)(void *ptr, size_t size);
-extern void (*junk_alloc_callback)(void *ptr, size_t size);
-extern bool opt_utrace;
-extern bool opt_xmalloc;
-extern bool opt_experimental_infallible_new;
-extern bool opt_zero;
-extern unsigned opt_narenas;
+extern bool        opt_junk_alloc;
+extern bool        opt_junk_free;
+extern void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size);
+extern void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size);
+extern void (*JET_MUTABLE invalid_conf_abort)(void);
+extern bool                  opt_utrace;
+extern bool                  opt_xmalloc;
+extern bool                  opt_experimental_infallible_new;
+extern bool                  opt_experimental_tcache_gc;
+extern bool                  opt_zero;
+extern unsigned              opt_narenas;
+extern fxp_t                 opt_narenas_ratio;
 extern zero_realloc_action_t opt_zero_realloc_action;
-extern malloc_init_t malloc_init_state;
-extern const char *zero_realloc_mode_names[];
-extern atomic_zu_t zero_realloc_count;
-extern bool opt_cache_oblivious;
+extern malloc_init_t         malloc_init_state;
+extern const char *const     zero_realloc_mode_names[];
+extern atomic_zu_t           zero_realloc_count;
+extern bool                  opt_cache_oblivious;
+extern unsigned              opt_debug_double_free_max_scan;
+extern size_t                opt_calloc_madvise_threshold;
+extern bool                  opt_disable_large_size_classes;
+
+extern const char *opt_malloc_conf_symlink;
+extern const char *opt_malloc_conf_env_var;
 
 /* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
 extern uintptr_t san_cache_bin_nonfast_mask;
@@ -53,23 +66,26 @@ extern unsigned manual_arena_base;
  */
 extern atomic_p_t arenas[];
 
-void *a0malloc(size_t size);
-void a0dalloc(void *ptr);
-void *bootstrap_malloc(size_t size);
-void *bootstrap_calloc(size_t num, size_t size);
-void bootstrap_free(void *ptr);
-void arena_set(unsigned ind, arena_t *arena);
+extern unsigned huge_arena_ind;
+
+void    *a0malloc(size_t size);
+void     a0dalloc(void *ptr);
+void    *bootstrap_malloc(size_t size);
+void    *bootstrap_calloc(size_t num, size_t size);
+void     bootstrap_free(void *ptr);
+void     arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
 arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
-void arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
-void iarena_cleanup(tsd_t *tsd);
-void arena_cleanup(tsd_t *tsd);
-size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
-void jemalloc_prefork(void);
-void jemalloc_postfork_parent(void);
-void jemalloc_postfork_child(void);
-void je_sdallocx_noflags(void *ptr, size_t size);
-void *malloc_default(size_t size);
+void     arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
+void     iarena_cleanup(tsd_t *tsd);
+void     arena_cleanup(tsd_t *tsd);
+size_t   batch_alloc(void **ptrs, size_t num, size_t size, int flags);
+void     jemalloc_prefork(void);
+void     jemalloc_postfork_parent(void);
+void     jemalloc_postfork_child(void);
+void     sdallocx_default(void *ptr, size_t size, int flags);
+void     free_default(void *ptr);
+void    *malloc_default(size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 9e27cc30..8513effd 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -1,10 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_A_H
 #define JEMALLOC_INTERNAL_INLINES_A_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/ticker.h"
 
 JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
@@ -14,6 +18,15 @@ malloc_getcpu(void) {
 	return GetCurrentProcessorNumber();
 #elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
 	return (malloc_cpuid_t)sched_getcpu();
+#elif defined(JEMALLOC_HAVE_RDTSCP)
+	unsigned int ecx;
+	asm volatile("rdtscp" : "=c"(ecx)::"eax", "edx");
+	return (malloc_cpuid_t)(ecx & 0xfff);
+#elif defined(__aarch64__) && defined(__APPLE__)
+	/* Other oses most likely use tpidr_el0 instead */
+	uintptr_t c;
+	asm volatile("mrs %x0, tpidrro_el0" : "=r"(c)::"memory");
+	return (malloc_cpuid_t)(c & (1 << 3) - 1);
 #else
 	not_reached();
 	return -1;
@@ -29,8 +42,8 @@ percpu_arena_choose(void) {
 	assert(cpuid >= 0);
 
 	unsigned arena_ind;
-	if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus /
-	    2)) {
+	if ((opt_percpu_arena == percpu_arena)
+	    || ((unsigned)cpuid < ncpus / 2)) {
 		arena_ind = cpuid;
 	} else {
 		assert(opt_percpu_arena == per_phycpu_arena);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 152f8a03..dad37a9c 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_a.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_a.h"
 
 static inline void
 percpu_arena_update(tsd_t *tsd, unsigned cpu) {
@@ -20,13 +23,13 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
-			    tcache, newarena);
+			assert(tcache_slow->arena != NULL);
+			tcache_arena_reassociate(
+			    tsd_tsdn(tsd), tcache_slow, tcache, newarena);
 		}
 	}
 }
 
-
 /* Choose an arena based on a per-thread value. */
 static inline arena_t *
 arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
@@ -47,18 +50,18 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		assert(ret);
 		if (tcache_available(tsd)) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-			tcache_t *tcache = tsd_tcachep_get(tsd);
+			tcache_t      *tcache = tsd_tcachep_get(tsd);
 			if (tcache_slow->arena != NULL) {
 				/* See comments in tsd_tcache_data_init().*/
-				assert(tcache_slow->arena ==
-				    arena_get(tsd_tsdn(tsd), 0, false));
+				assert(tcache_slow->arena
+				    == arena_get(tsd_tsdn(tsd), 0, false));
 				if (tcache_slow->arena != ret) {
 					tcache_arena_reassociate(tsd_tsdn(tsd),
 					    tcache_slow, tcache, ret);
 				}
 			} else {
-				tcache_arena_associate(tsd_tsdn(tsd),
-				    tcache_slow, tcache, ret);
+				tcache_arena_associate(
+				    tsd_tsdn(tsd), tcache_slow, tcache, ret);
 			}
 		}
 	}
@@ -68,10 +71,10 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	 * auto percpu arena range, (i.e. thread is assigned to a manually
 	 * managed arena), then percpu arena is skipped.
 	 */
-	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) &&
-	    !internal && (arena_ind_get(ret) <
-	    percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd !=
-	    tsd_tsdn(tsd))) {
+	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)
+	    && !internal
+	    && (arena_ind_get(ret) < percpu_arena_ind_limit(opt_percpu_arena))
+	    && (ret->last_thd != tsd_tsdn(tsd))) {
 		unsigned ind = percpu_arena_choose();
 		if (arena_ind_get(ret) != ind) {
 			percpu_arena_update(tsd, ind);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index b0868b7d..2c61f8c4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_C_H
 #define JEMALLOC_INTERNAL_INLINES_C_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
@@ -8,6 +12,15 @@
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/witness.h"
 
+/*
+ * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
+ * should have one constant here per magic value there.  Note however that the
+ * representations need not be related.
+ */
+#define TCACHE_IND_NONE ((unsigned)-1)
+#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
+#define ARENA_IND_AUTOMATIC ((unsigned)-1)
+
 /*
  * Translating the names of the 'i' functions:
  *   Abbreviations used in the first part of the function name (before
@@ -41,24 +54,35 @@ isalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
-    bool is_internal, arena_t *arena, bool slow_path) {
+iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
+    bool slab, tcache_t *tcache, bool is_internal, arena_t *arena,
+    bool slow_path) {
 	void *ret;
 
+	assert(!slab || sz_can_use_slab(size)); /* slab && large is illegal */
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
-	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
+	ret = arena_malloc(
+	    tsdn, arena, size, ind, zero, slab, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
 	}
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+    bool is_internal, arena_t *arena, bool slow_path) {
+	bool slab = sz_can_use_slab(size);
+	return iallocztm_explicit_slab(
+	    tsdn, size, ind, zero, slab, tcache, is_internal, arena, slow_path);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
@@ -66,18 +90,19 @@ ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena) {
+ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment,
+    bool zero, bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) {
 	void *ret;
 
+	assert(!slab || sz_can_use_slab(usize)); /* slab && large is illegal */
 	assert(usize != 0);
 	assert(usize == sz_sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_internal && likely(ret != NULL)) {
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
@@ -85,12 +110,26 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, bool is_internal, arena_t *arena) {
+	return ipallocztm_explicit_slab(tsdn, usize, alignment, zero,
+	    sz_can_use_slab(usize), tcache, is_internal, arena);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena) {
 	return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    bool slab, tcache_t *tcache, arena_t *arena) {
+	return ipallocztm_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, false, arena);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
 	return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
@@ -108,13 +147,13 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
-	if (!is_internal && !tsdn_null(tsdn) &&
-	    tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+	if (!is_internal && !tsdn_null(tsdn)
+	    && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
 		assert(tcache == NULL);
 	}
 	arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
@@ -128,25 +167,26 @@ idalloc(tsd_t *tsd, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     emap_alloc_ctx_t *alloc_ctx, bool slow_path) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
+    size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
     hook_ralloc_args_t *hook_args) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	void *p;
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
+	void  *p;
 	size_t usize, copysize;
 
 	usize = sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+	p = ipalloct_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, arena);
 	if (p == NULL) {
 		return NULL;
 	}
@@ -156,11 +196,12 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx, p,
+	    (uintptr_t)p, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
 	return p;
 }
@@ -173,33 +214,42 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
  * passed-around anywhere.
  */
 JEMALLOC_ALWAYS_INLINE void *
-iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args)
-{
+iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
+    hook_ralloc_args_t *hook_args) {
 	assert(ptr != NULL);
 	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
-	    != 0) {
+	if (alignment != 0
+	    && ((uintptr_t)ptr & ((uintptr_t)alignment - 1)) != 0) {
 		/*
 		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
 		 */
 		return iralloct_realign(tsdn, ptr, oldsize, size, alignment,
-		    zero, tcache, arena, hook_args);
+		    zero, slab, tcache, arena, hook_args);
 	}
 
 	return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
-	    tcache, hook_args);
+	    slab, tcache, hook_args);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
+    hook_ralloc_args_t *hook_args) {
+	bool slab = sz_can_use_slab(usize);
+	return iralloct_explicit_slab(tsdn, ptr, oldsize, size, alignment, zero,
+	    slab, tcache, arena, hook_args);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, hook_ralloc_args_t *hook_args) {
-	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
-	    tcache_get(tsd), NULL, hook_args);
+    size_t usize, bool zero, hook_ralloc_args_t *hook_args) {
+	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, usize,
+	    zero, tcache_get(tsd), NULL, hook_args);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -207,29 +257,27 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero, size_t *newsize) {
 	assert(ptr != NULL);
 	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
-	    != 0) {
+	if (alignment != 0
+	    && ((uintptr_t)ptr & ((uintptr_t)alignment - 1)) != 0) {
 		/* Existing object alignment is inadequate. */
 		*newsize = oldsize;
 		return true;
 	}
 
-	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero,
-	    newsize);
+	return arena_ralloc_no_move(
+	    tsdn, ptr, oldsize, size, extra, zero, newsize);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
-    cache_bin_t *bin, void *ret) {
+fastpath_success_finish(
+    tsd_t *tsd, uint64_t allocated_after, cache_bin_t *bin, void *ret) {
 	thread_allocated_set(tsd, allocated_after);
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
-
-	LOG("core.malloc.exit", "result: %p", ret);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -256,7 +304,6 @@ malloc_initialized(void) {
  */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
-	LOG("core.malloc.entry", "size: %zu", size);
 	if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
 		return fallback_alloc(size);
 	}
@@ -284,8 +331,8 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	sz_size2index_usize_fastpath(size, &ind, &usize);
 	/* Fast path relies on size being a bin. */
 	assert(ind < SC_NBINS);
-	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
-	    (size <= SC_SMALL_MAXCLASS));
+	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS)
+	    && (size <= SC_SMALL_MAXCLASS));
 
 	uint64_t allocated, threshold;
 	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
@@ -314,7 +361,9 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	assert(tcache == tcache_get(tsd));
 	cache_bin_t *bin = &tcache->bins[ind];
-	bool tcache_success;
+	/* Suppress spurious warning from static analysis */
+	assert(bin != NULL);
+	bool  tcache_success;
 	void *ret;
 
 	/*
@@ -337,4 +386,215 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	return fallback_alloc(size);
 }
 
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
+	tcache_t *tcache;
+	if (tcache_ind == TCACHE_IND_AUTOMATIC) {
+		if (likely(!slow)) {
+			/* Getting tcache ptr unconditionally. */
+			tcache = tsd_tcachep_get(tsd);
+			assert(tcache == tcache_get(tsd));
+		} else if (is_alloc
+		    || likely(tsd_reentrancy_level_get(tsd) == 0)) {
+			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
+		}
+	} else {
+		/*
+                 * Should not specify tcache on deallocation path when being
+                 * reentrant.
+                 */
+		assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0
+		    || tsd_state_nocleanup(tsd));
+		if (tcache_ind == TCACHE_IND_NONE) {
+			tcache = NULL;
+		} else {
+			tcache = tcaches_get(tsd, tcache_ind);
+		}
+	}
+	return tcache;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
+	if (config_opt_size_checks) {
+		emap_alloc_ctx_t dbg_ctx;
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &dbg_ctx);
+		if (alloc_ctx->szind != dbg_ctx.szind) {
+			safety_check_fail_sized_dealloc(
+			    /* current_dealloc */ true, ptr,
+			    /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx),
+			    /* input_size */
+			    emap_alloc_ctx_usize_get(alloc_ctx));
+			return true;
+		}
+		if (alloc_ctx->slab != dbg_ctx.slab) {
+			safety_check_fail(
+			    "Internal heap corruption detected: "
+			    "mismatch in slab bit");
+			return true;
+		}
+	}
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_aligned(const void *ptr) {
+	return ((uintptr_t)ptr & PROF_SAMPLE_ALIGNMENT_MASK) == 0;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
+	/*
+         * free_fastpath do not handle two uncommon cases: 1) sampled profiled
+         * objects and 2) sampled junk & stash for use-after-free detection.
+         * Both have special alignments which are used to escape the fastpath.
+         *
+         * prof_sample is page-aligned, which covers the UAF check when both
+         * are enabled (the assertion below).  Avoiding redundant checks since
+         * this is on the fastpath -- at most one runtime branch from this.
+         */
+	if (config_debug && cache_bin_nonfast_aligned(ptr)) {
+		assert(prof_sample_aligned(ptr));
+	}
+
+	if (config_prof && check_prof) {
+		/* When prof is enabled, the prof_sample alignment is enough. */
+		if (prof_sample_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	if (config_uaf_detection) {
+		if (cache_bin_nonfast_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	return false;
+}
+
+/* Returns whether or not the free attempt was successful. */
+JEMALLOC_ALWAYS_INLINE
+bool
+free_fastpath(void *ptr, size_t size, bool size_hint) {
+	tsd_t *tsd = tsd_get(false);
+	/* The branch gets optimized away unless tsd_get_allocates(). */
+	if (unlikely(tsd == NULL)) {
+		return false;
+	}
+	/*
+         *  The tsd_fast() / initialized checks are folded into the branch
+         *  testing (deallocated_after >= threshold) later in this function.
+         *  The threshold will be set to 0 when !tsd_fast.
+         */
+	assert(tsd_fast(tsd)
+	    || *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
+
+	emap_alloc_ctx_t alloc_ctx JEMALLOC_CC_SILENCE_INIT({0, 0, false});
+	size_t                     usize;
+	if (!size_hint) {
+		bool err = emap_alloc_ctx_try_lookup_fast(
+		    tsd, &arena_emap_global, ptr, &alloc_ctx);
+
+		/* Note: profiled objects will have alloc_ctx.slab set */
+		if (unlikely(err || !alloc_ctx.slab
+		        || free_fastpath_nonfast_aligned(ptr,
+		            /* check_prof */ false))) {
+			return false;
+		}
+		assert(alloc_ctx.szind != SC_NSIZES);
+		usize = sz_index2size(alloc_ctx.szind);
+	} else {
+		/*
+                 * Check for both sizes that are too large, and for sampled /
+                 * special aligned objects.  The alignment check will also check
+                 * for null ptr.
+                 */
+		if (unlikely(size > SC_LOOKUP_MAXCLASS
+		        || free_fastpath_nonfast_aligned(ptr,
+		            /* check_prof */ true))) {
+			return false;
+		}
+		sz_size2index_usize_fastpath(size, &alloc_ctx.szind, &usize);
+		/* Max lookup class must be small. */
+		assert(alloc_ctx.szind < SC_NBINS);
+		/* This is a dead store, except when opt size checking is on. */
+		alloc_ctx.slab = true;
+	}
+	/*
+         * Currently the fastpath only handles small sizes.  The branch on
+         * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
+         * tcache szind upper limit (i.e. tcache_max) as well.
+         */
+	assert(alloc_ctx.slab);
+
+	uint64_t deallocated, threshold;
+	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
+
+	uint64_t deallocated_after = deallocated + usize;
+	/*
+         * Check for events and tsd non-nominal (fast_threshold will be set to
+         * 0) in a single branch.  Note that this handles the uninitialized case
+         * as well (TSD init will be triggered on the non-fastpath).  Therefore
+         * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
+         * below) needs to be after this branch.
+         */
+	if (unlikely(deallocated_after >= threshold)) {
+		return false;
+	}
+	assert(tsd_fast(tsd));
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/* See the comment in isfree. */
+		return true;
+	}
+
+	tcache_t    *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+	       /* slow */ false, /* is_alloc */ false);
+	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
+
+	/*
+         * If junking were enabled, this is where we would do it.  It's not
+         * though, since we ensured above that we're on the fast path.  Assert
+         * that to double-check.
+         */
+	assert(!opt_junk_free);
+
+	if (!cache_bin_dalloc_easy(bin, ptr)) {
+		return false;
+	}
+
+	*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
+
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_sdallocx_noflags(void *ptr, size_t size) {
+	if (!free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, 0);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_sdallocx_impl(void *ptr, size_t size, int flags) {
+	if (flags != 0 || !free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, flags);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_free_impl(void *ptr) {
+	if (!free_fastpath(ptr, 0, false)) {
+		free_default(ptr);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index e97b5f90..eb1ca119 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -2,43 +2,46 @@
 #define JEMALLOC_INTERNAL_MACROS_H
 
 #ifdef JEMALLOC_DEBUG
-#  define JEMALLOC_ALWAYS_INLINE static inline
+#	define JEMALLOC_ALWAYS_INLINE static inline
 #else
-#  ifdef _MSC_VER
-#    define JEMALLOC_ALWAYS_INLINE static __forceinline
-#  else
-#    define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
-#  endif
+#	ifdef _MSC_VER
+#		define JEMALLOC_ALWAYS_INLINE static __forceinline
+#	else
+#		define JEMALLOC_ALWAYS_INLINE                                 \
+			JEMALLOC_ATTR(always_inline) static inline
+#	endif
 #endif
 #ifdef _MSC_VER
-#  define inline _inline
+#	define inline _inline
 #endif
 
 #define UNUSED JEMALLOC_ATTR(unused)
 
-#define ZU(z)	((size_t)z)
-#define ZD(z)	((ssize_t)z)
-#define QU(q)	((uint64_t)q)
-#define QD(q)	((int64_t)q)
+#define ZU(z) ((size_t)z)
+#define ZD(z) ((ssize_t)z)
+#define QU(q) ((uint64_t)q)
+#define QD(q) ((int64_t)q)
 
-#define KZU(z)	ZU(z##ULL)
-#define KZD(z)	ZD(z##LL)
-#define KQU(q)	QU(q##ULL)
-#define KQD(q)	QI(q##LL)
+#define KZU(z) ZU(z##ULL)
+#define KZD(z) ZD(z##LL)
+#define KQU(q) QU(q##ULL)
+#define KQD(q) QI(q##LL)
 
 #ifndef __DECONST
-#  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
+#	define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
 #endif
 
 #if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus)
-#  define restrict
+#	define restrict
 #endif
 
 /* Various function pointers are static and immutable except during testing. */
 #ifdef JEMALLOC_JET
-#  define JET_MUTABLE
+#	define JET_MUTABLE
+#	define JET_EXTERN extern
 #else
-#  define JET_MUTABLE const
+#	define JET_MUTABLE const
+#	define JET_EXTERN static
 #endif
 
 #define JEMALLOC_VA_ARGS_HEAD(head, ...) head
@@ -46,62 +49,94 @@
 
 /* Diagnostic suppression macros */
 #if defined(_MSC_VER) && !defined(__clang__)
-#  define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
-#  define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W))
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#	define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
+#	define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable : W))
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* #pragma GCC diagnostic first appeared in gcc 4.6. */
-#elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \
-  (__GNUC_MINOR__ > 5)))) || defined(__clang__)
+#elif (defined(__GNUC__)                                                       \
+    && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))))          \
+    || defined(__clang__)
 /*
  * The JEMALLOC_PRAGMA__ macro is an implementation detail of the GCC and Clang
  * diagnostic suppression macros and should not be used anywhere else.
  */
-#  define JEMALLOC_PRAGMA__(X) _Pragma(#X)
-#  define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
-#  define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) \
-     JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
+#	define JEMALLOC_PRAGMA__(X) _Pragma(#X)
+#	define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
+#	define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W)                                  \
+		JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
 
 /*
  * The -Wmissing-field-initializers warning is buggy in GCC versions < 5.1 and
  * all clang versions up to version 7 (currently trunk, unreleased).  This macro
  * suppresses the warning for the affected compiler versions only.
  */
-#  if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || \
-     defined(__clang__)
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS  \
-          JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
-#  else
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  endif
+#	if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5))     \
+	    || defined(__clang__)
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS \
+			JEMALLOC_DIAGNOSTIC_IGNORE(                                  \
+			    "-Wmissing-field-initializers")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	endif
 
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS  \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
-#  if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN \
-       JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
-#  else
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  endif
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \
-  JEMALLOC_DIAGNOSTIC_PUSH \
-  JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS                       \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wframe-address")
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS                         \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER                    \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
+#	if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN      \
+			JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	endif
+#	ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED                  \
+			JEMALLOC_DIAGNOSTIC_IGNORE("-Wdeprecated-declarations")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	endif
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS                           \
+		JEMALLOC_DIAGNOSTIC_PUSH                                       \
+		JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
 #else
-#  define JEMALLOC_DIAGNOSTIC_PUSH
-#  define JEMALLOC_DIAGNOSTIC_POP
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W)
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#	define JEMALLOC_DIAGNOSTIC_PUSH
+#	define JEMALLOC_DIAGNOSTIC_POP
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W)
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 #endif
 
+#ifdef __clang_analyzer__
+#	define JEMALLOC_CLANG_ANALYZER
+#endif
+
+#ifdef JEMALLOC_CLANG_ANALYZER
+#	define JEMALLOC_CLANG_ANALYZER_SUPPRESS __attribute__((suppress))
+#	define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v) = v
+#else
+#	define JEMALLOC_CLANG_ANALYZER_SUPPRESS
+#	define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v)
+#endif
+
+#define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...)                                   \
+	JEMALLOC_DIAGNOSTIC_PUSH                                               \
+	JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED                                  \
+	__VA_ARGS__                                                            \
+	JEMALLOC_DIAGNOSTIC_POP
+
 /*
  * Disables spurious diagnostics for all headers.  Since these headers are not
  * included by users directly, it does not affect their diagnostic settings.
diff --git a/include/jemalloc/internal/jemalloc_internal_overrides.h b/include/jemalloc/internal/jemalloc_internal_overrides.h
new file mode 100644
index 00000000..bf74a612
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_overrides.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_OVERRIDES_H
+#define JEMALLOC_INTERNAL_OVERRIDES_H
+
+/*
+ * Under normal circumstances this header serves no purpose, as these settings
+ * can be customized via the corresponding autoconf options at configure-time.
+ * Overriding in this fashion is useful when the header files generated by
+ * autoconf are used as input for another build system.
+ */
+
+#ifdef JEMALLOC_OVERRIDE_LG_PAGE
+#	undef LG_PAGE
+#	define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
+#endif
+
+#ifdef JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
+#	undef JEMALLOC_CONFIG_MALLOC_CONF
+#	define JEMALLOC_CONFIG_MALLOC_CONF                                    \
+		JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
+#endif
+
+#endif /* JEMALLOC_INTERNAL_OVERRIDES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 62c2b59c..0ade5461 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -18,13 +18,13 @@ enum zero_realloc_action_e {
 typedef enum zero_realloc_action_e zero_realloc_action_t;
 
 /* Signature of write callback. */
-typedef void (write_cb_t)(void *, const char *);
+typedef void(write_cb_t)(void *, const char *);
 
 enum malloc_init_e {
-	malloc_init_uninitialized	= 3,
-	malloc_init_a0_initialized	= 2,
-	malloc_init_recursible		= 1,
-	malloc_init_initialized		= 0 /* Common case --> jnz. */
+	malloc_init_uninitialized = 3,
+	malloc_init_a0_initialized = 2,
+	malloc_init_recursible = 1,
+	malloc_init_initialized = 0 /* Common case --> jnz. */
 };
 typedef enum malloc_init_e malloc_init_t;
 
@@ -39,48 +39,46 @@ typedef enum malloc_init_e malloc_init_t;
  *
  * aaaaaaaa aaaatttt tttttttt 0znnnnnn
  */
-#define MALLOCX_ARENA_BITS	12
-#define MALLOCX_TCACHE_BITS	12
-#define MALLOCX_LG_ALIGN_BITS	6
-#define MALLOCX_ARENA_SHIFT	20
-#define MALLOCX_TCACHE_SHIFT	8
-#define MALLOCX_ARENA_MASK \
-    (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
+#define MALLOCX_ARENA_BITS 12
+#define MALLOCX_TCACHE_BITS 12
+#define MALLOCX_LG_ALIGN_BITS 6
+#define MALLOCX_ARENA_SHIFT 20
+#define MALLOCX_TCACHE_SHIFT 8
+#define MALLOCX_ARENA_MASK                                                     \
+	((unsigned)(((1U << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT))
 /* NB: Arena index bias decreases the maximum number of arenas by 1. */
-#define MALLOCX_ARENA_LIMIT	((1 << MALLOCX_ARENA_BITS) - 1)
-#define MALLOCX_TCACHE_MASK \
-    (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
-#define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
-#define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
+#define MALLOCX_ARENA_LIMIT ((unsigned)((1U << MALLOCX_ARENA_BITS) - 1))
+#define MALLOCX_TCACHE_MASK                                                    \
+	((unsigned)(((1U << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT))
+#define MALLOCX_TCACHE_MAX ((unsigned)((1U << MALLOCX_TCACHE_BITS) - 3))
+#define MALLOCX_LG_ALIGN_MASK ((1 << MALLOCX_LG_ALIGN_BITS) - 1)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
-#define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
-    (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
-#define MALLOCX_ALIGN_GET(flags)					\
-    (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
-#define MALLOCX_ZERO_GET(flags)						\
-    ((bool)(flags & MALLOCX_ZERO))
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags)                                     \
+	(ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
+#define MALLOCX_ALIGN_GET(flags)                                               \
+	(MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX - 1))
+#define MALLOCX_ZERO_GET(flags) ((bool)(flags & MALLOCX_ZERO))
 
-#define MALLOCX_TCACHE_GET(flags)					\
-    (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
-#define MALLOCX_ARENA_GET(flags)					\
-    (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
+#define MALLOCX_TCACHE_GET(flags)                                              \
+	(((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT))   \
+	    - 2)
+#define MALLOCX_ARENA_GET(flags)                                               \
+	(((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
 
 /* Smallest size class to support. */
-#define TINY_MIN		(1U << LG_TINY_MIN)
+#define TINY_MIN (1U << LG_TINY_MIN)
 
-#define LONG			((size_t)(1U << LG_SIZEOF_LONG))
-#define LONG_MASK		(LONG - 1)
+#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
+#define LONG_MASK (LONG - 1)
 
 /* Return the smallest long multiple that is >= a. */
-#define LONG_CEILING(a)							\
-	(((a) + LONG_MASK) & ~LONG_MASK)
+#define LONG_CEILING(a) (((a) + LONG_MASK) & ~LONG_MASK)
 
-#define SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
-#define PTR_MASK		(SIZEOF_PTR - 1)
+#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
+#define PTR_MASK (SIZEOF_PTR - 1)
 
 /* Return the smallest (void *) multiple that is >= a. */
-#define PTR_CEILING(a)							\
-	(((a) + PTR_MASK) & ~PTR_MASK)
+#define PTR_CEILING(a) (((a) + PTR_MASK) & ~PTR_MASK)
 
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
@@ -89,42 +87,62 @@ typedef enum malloc_init_e malloc_init_t;
  * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
  * only handle raw constants.
  */
-#define LG_CACHELINE		6
-#define CACHELINE		64
-#define CACHELINE_MASK		(CACHELINE - 1)
+#define LG_CACHELINE 6
+#define CACHELINE 64
+#define CACHELINE_MASK (CACHELINE - 1)
 
 /* Return the smallest cacheline multiple that is >= s. */
-#define CACHELINE_CEILING(s)						\
-	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+#define CACHELINE_CEILING(s) (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
 
 /* Return the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2BASE(a, alignment)				\
-	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
+#define ALIGNMENT_ADDR2BASE(a, alignment)                                      \
+	((void *)(((byte_t *)(a))                                              \
+	    - (((uintptr_t)(a)) - ((uintptr_t)(a) & ((~(alignment)) + 1)))))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
+#define ALIGNMENT_ADDR2OFFSET(a, alignment)                                    \
 	((size_t)((uintptr_t)(a) & (alignment - 1)))
 
 /* Return the smallest alignment multiple that is >= s. */
-#define ALIGNMENT_CEILING(s, alignment)					\
+#define ALIGNMENT_CEILING(s, alignment)                                        \
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
+/*
+ * Return the nearest aligned address at or above a.
+ *
+ * While at first glance this would appear to be merely a more complicated
+ * way to perform the same computation as `ALIGNMENT_CEILING`,
+ * this has the important additional property of not concealing pointer
+ * provenance from the compiler. See the block-comment on the
+ * definition of `byte_t` for more details.
+ */
+#define ALIGNMENT_ADDR2CEILING(a, alignment)                                   \
+	((void *)(((byte_t *)(a))                                              \
+	    + (((((uintptr_t)(a)) + (alignment - 1)) & ((~(alignment)) + 1))   \
+	        - ((uintptr_t)(a)))))
+
 /* Declare a variable-length array. */
-#if __STDC_VERSION__ < 199901L
-#  ifdef _MSC_VER
-#    include <malloc.h>
-#    define alloca _alloca
-#  else
-#    ifdef JEMALLOC_HAS_ALLOCA_H
-#      include <alloca.h>
-#    else
-#      include <stdlib.h>
-#    endif
-#  endif
-#  define VARIABLE_ARRAY(type, name, count) \
-	type *name = alloca(sizeof(type) * (count))
+#if __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
+#	ifdef _MSC_VER
+#		include <malloc.h>
+#		define alloca _alloca
+#	else
+#		ifdef JEMALLOC_HAS_ALLOCA_H
+#			include <alloca.h>
+#		else
+#			include <stdlib.h>
+#		endif
+#	endif
+#	define VARIABLE_ARRAY_UNSAFE(type, name, count)                       \
+		type *name = alloca(sizeof(type) * (count))
 #else
-#  define VARIABLE_ARRAY(type, name, count) type name[(count)]
+#	define VARIABLE_ARRAY_UNSAFE(type, name, count) type name[(count)]
 #endif
+#define VARIABLE_ARRAY_SIZE_MAX 2048
+#define VARIABLE_ARRAY(type, name, count)                                      \
+	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);             \
+	VARIABLE_ARRAY_UNSAFE(type, name, count)
+
+#define CALLOC_MADVISE_THRESHOLD_DEFAULT (((size_t)1) << 23) /* 8 MB */
 
 #endif /* JEMALLOC_INTERNAL_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 5ce77d96..bbfe2513 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_PREAMBLE_H
 #define JEMALLOC_PREAMBLE_H
 
-#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_defs.h"
 #include "jemalloc/internal/jemalloc_internal_decls.h"
 
 #if defined(JEMALLOC_UTRACE) || defined(JEMALLOC_UTRACE_LABEL)
@@ -57,6 +57,15 @@
 #  define JEMALLOC_MADV_FREE 8
 #endif
 
+/*
+ * Can be defined at compile time, in cases, when it is known
+ * madvise(..., MADV_COLLAPSE) feature is supported, but MADV_COLLAPSE
+ * constant is not defined.
+ */
+#ifdef JEMALLOC_DEFINE_MADVISE_COLLAPSE
+#  define JEMALLOC_MADV_COLLAPSE 25
+#endif
+
 static const bool config_debug =
 #ifdef JEMALLOC_DEBUG
     true
@@ -78,6 +87,13 @@ static const bool have_madvise_huge =
     false
 #endif
     ;
+static const bool have_process_madvise =
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_fill =
 #ifdef JEMALLOC_FILL
     true
@@ -114,6 +130,13 @@ static const bool config_prof_libunwind =
     false
 #endif
     ;
+static const bool config_prof_frameptr =
+#ifdef JEMALLOC_PROF_FRAME_POINTER
+    true
+#else
+    false
+#endif
+    ;
 static const bool maps_coalesce =
 #ifdef JEMALLOC_MAPS_COALESCE
     true
@@ -215,7 +238,7 @@ static const bool config_enable_cxx =
 #endif
 ;
 
-#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
+#if defined(_WIN32) || defined(__APPLE__) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
 #endif
diff --git a/include/jemalloc/internal/jemalloc_probe.h b/include/jemalloc/internal/jemalloc_probe.h
new file mode 100644
index 00000000..8ef3105d
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe.h
@@ -0,0 +1,49 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_H
+
+#include <jemalloc/internal/jemalloc_preamble.h>
+
+#ifdef JEMALLOC_EXPERIMENTAL_USDT_STAP
+#include <jemalloc/internal/jemalloc_probe_stap.h>
+#elif defined(JEMALLOC_EXPERIMENTAL_USDT_CUSTOM)
+#include <jemalloc/internal/jemalloc_probe_custom.h>
+#elif defined(_MSC_VER)
+#define JE_USDT(name, N, ...) /* Nothing */
+#else /*  no USDT, just check the args */
+
+#define JE_USDT(name, N, ...) _JE_USDT_CHECK_ARG##N(__VA_ARGS__)
+
+#define _JE_USDT_CHECK_ARG1(a)						\
+	do {								\
+		(void)(a);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG2(a, b)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG3(a, b, c)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG4(a, b, c, d)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+		(void)(d);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG5(a, b, c, d, e)				\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+		(void)(d);						\
+		(void)(e);						\
+	} while (0)
+
+#endif /* JEMALLOC_EXPERIMENTAL_USDT_* */
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_H */
diff --git a/include/jemalloc/internal/jemalloc_probe_custom.h b/include/jemalloc/internal/jemalloc_probe_custom.h
new file mode 100644
index 00000000..3c22749f
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe_custom.h
@@ -0,0 +1,148 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H
+
+/* clang-format off */
+
+/*
+ * This section is based on sys/sdt.h and
+ * https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ */
+
+/* Emit NOP for the probe. */
+#if (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \
+     defined(__arm__)) && defined(__linux__)
+#define JE_SDT_NOP nop
+#else
+#error "Architecture not supported"
+#endif
+
+/* Assembly macros */
+#define JE_SDT_S(x) #x
+
+#define JE_SDT_ASM_1(x) JE_SDT_S(x) "\n"
+
+#define JE_SDT_ASM_2(x, y)			\
+	JE_SDT_S(x) "," JE_SDT_S(y) "\n"
+
+#define JE_SDT_ASM_3(x, y, z)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","  JE_SDT_S(z) "\n"
+
+#define JE_SDT_ASM_3(x, y, z)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","  JE_SDT_S(z) "\n"
+
+#define JE_SDT_ASM_4(x, y, z, p)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) "," JE_SDT_S(z) "," JE_SDT_S(p) "\n"
+
+#define JE_SDT_ASM_5(x, y, z, p, q)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","	JE_SDT_S(z) "," JE_SDT_S(p) ","	\
+		JE_SDT_S(q) "\n"
+
+/* Arg size */
+#ifdef __LP64__
+#define JE_SDT_ASM_ADDR            .8byte
+#else
+#define JE_SDT_ASM_ADDR            .4byte
+#endif
+
+#define JE_SDT_NOTE_NAME  "stapsdt"
+#define JE_SDT_NOTE_TYPE  3
+
+#define JE_SDT_SEMAPHORE_NONE(provider, name)			\
+	JE_SDT_ASM_1(JE_SDT_ASM_ADDR 0) /* No Semaphore support */
+#define JE_SDT_SEMAPHORE_OPERAND(provider, name)	\
+	[__sdt_semaphore] "ip" (0) /* No Semaphore */
+
+#define JE_SDT_ASM_STRING(x)     JE_SDT_ASM_1(.asciz JE_SDT_S(x))
+
+#define JE_SDT_NOTE(provider, name, arg_template)			\
+	JE_SDT_ASM_1(990: JE_SDT_NOP)					\
+	JE_SDT_ASM_3(     .pushsection .note.stapsdt,"?","note")	\
+	JE_SDT_ASM_1(     .balign 4)					\
+	JE_SDT_ASM_3(     .4byte 992f-991f, 994f-993f, JE_SDT_NOTE_TYPE) \
+	JE_SDT_ASM_1(991: .asciz JE_SDT_NOTE_NAME)			\
+	JE_SDT_ASM_1(992: .balign 4)					\
+	JE_SDT_ASM_1(993: JE_SDT_ASM_ADDR 990b)				\
+	JE_SDT_ASM_1(     JE_SDT_ASM_ADDR _.stapsdt.base)		\
+	JE_SDT_SEMAPHORE_NONE(provider, name)				\
+	JE_SDT_ASM_STRING(provider)					\
+	JE_SDT_ASM_STRING(name)						\
+	JE_SDT_ASM_STRING(arg_template)					\
+	JE_SDT_ASM_1(994: .balign 4)					\
+	JE_SDT_ASM_1(     .popsection)
+
+#define JE_SDT_BASE							\
+	JE_SDT_ASM_1(     .ifndef _.stapsdt.base)			\
+	JE_SDT_ASM_5(     .pushsection .stapsdt.base, "aG", "progbits",	\
+		    .stapsdt.base,comdat)				\
+	JE_SDT_ASM_1(     .weak _.stapsdt.base)				\
+	JE_SDT_ASM_1(     .hidden _.stapsdt.base)			\
+	JE_SDT_ASM_1(     _.stapsdt.base: .space 1)			\
+	JE_SDT_ASM_2(     .size _.stapsdt.base, 1)			\
+	JE_SDT_ASM_1(     .popsection)					\
+	JE_SDT_ASM_1(     .endif)
+
+
+/*
+ * Default constraint for probes arguments.
+ * See https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+#ifndef JE_SDT_ARG_CONSTRAINT
+#define JE_SDT_ARG_CONSTRAINT      "nor"
+#endif
+
+#define JE_SDT_ARGARRAY(x)  ((__builtin_classify_type(x) == 14) ||  \
+			     (__builtin_classify_type(x) == 5))
+#define JE_SDT_ARGSIZE(x)   (JE_SDT_ARGARRAY(x) ? sizeof(void*) : sizeof(x))
+
+/*
+ * Format of each probe argument as operand.  Size tagged with JE_SDT_Sn,
+ * with "n" constraint.  Value is tagged with JE_SDT_An with configured
+ * constraint.
+ */
+#define JE_SDT_ARG(n, x)						\
+	[JE_SDT_S##n] "n"                ((size_t)JE_SDT_ARGSIZE(x)),	\
+		[JE_SDT_A##n] JE_SDT_ARG_CONSTRAINT(x)
+
+/* Templates to append arguments as operands. */
+#define JE_SDT_OPERANDS_0()     [__sdt_dummy] "g" (0)
+#define JE_SDT_OPERANDS_1(_1)      JE_SDT_ARG(1, _1)
+#define JE_SDT_OPERANDS_2(_1, _2)  JE_SDT_OPERANDS_1(_1), JE_SDT_ARG(2, _2)
+#define JE_SDT_OPERANDS_3(_1, _2, _3) JE_SDT_OPERANDS_2(_1, _2), JE_SDT_ARG(3, _3)
+#define JE_SDT_OPERANDS_4(_1, _2, _3, _4)			\
+	JE_SDT_OPERANDS_3(_1, _2, _3), JE_SDT_ARG(4, _4)
+#define JE_SDT_OPERANDS_5(_1, _2, _3, _4, _5)			\
+	JE_SDT_OPERANDS_4(_1, _2, _3, _4), JE_SDT_ARG(5, _5)
+#define JE_SDT_OPERANDS_6(_1, _2, _3, _4, _5, _6)			\
+	JE_SDT_OPERANDS_5(_1, _2, _3, _4, _5), JE_SDT_ARG(6, _6)
+#define JE_SDT_OPERANDS_7(_1, _2, _3, _4, _5, _6, _7)		\
+	JE_SDT_OPERANDS_6(_1, _2, _3, _4, _5, _6), JE_SDT_ARG(7, _7)
+
+/* Templates to reference the arguments from operands. */
+#define JE_SDT_ARGFMT(num)        %n[JE_SDT_S##num]@%[JE_SDT_A##num]
+#define JE_SDT_ARG_TEMPLATE_0    /* No args */
+#define JE_SDT_ARG_TEMPLATE_1    JE_SDT_ARGFMT(1)
+#define JE_SDT_ARG_TEMPLATE_2    JE_SDT_ARG_TEMPLATE_1 JE_SDT_ARGFMT(2)
+#define JE_SDT_ARG_TEMPLATE_3    JE_SDT_ARG_TEMPLATE_2 JE_SDT_ARGFMT(3)
+#define JE_SDT_ARG_TEMPLATE_4    JE_SDT_ARG_TEMPLATE_3 JE_SDT_ARGFMT(4)
+#define JE_SDT_ARG_TEMPLATE_5    JE_SDT_ARG_TEMPLATE_4 JE_SDT_ARGFMT(5)
+#define JE_SDT_ARG_TEMPLATE_6    JE_SDT_ARG_TEMPLATE_5 JE_SDT_ARGFMT(6)
+#define JE_SDT_ARG_TEMPLATE_7    JE_SDT_ARG_TEMPLATE_6 JE_SDT_ARGFMT(7)
+
+#define JE_SDT_PROBE(							\
+	provider, name, n, arglist)					\
+	do {								\
+		__asm__ __volatile__(					\
+			JE_SDT_NOTE(provider, name,			\
+				    JE_SDT_ARG_TEMPLATE_##n)		\
+			:: JE_SDT_SEMAPHORE_OPERAND(provider, name),	\
+			JE_SDT_OPERANDS_##n arglist);			\
+		__asm__ __volatile__(JE_SDT_BASE);			\
+	} while (0)
+
+#define JE_USDT(name, N, ...)						\
+  JE_SDT_PROBE(jemalloc, name, N, (__VA_ARGS__))
+
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H */
+
+/* clang-format on */
diff --git a/include/jemalloc/internal/jemalloc_probe_stap.h b/include/jemalloc/internal/jemalloc_probe_stap.h
new file mode 100644
index 00000000..302b6cbb
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe_stap.h
@@ -0,0 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H
+
+#include <sys/sdt.h>
+
+#define JE_USDT(name, N, ...) JE_USDT_PROBE_N(name, N, ##__VA_ARGS__)
+
+#define JE_USDT_PROBE_N(name, N, ...)                                          \
+	STAP_PROBE##N(jemalloc, name, ##__VA_ARGS__)
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H */
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 8e09122d..84c6c5d6 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -1,23 +1,24 @@
 #ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 #define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/hook.h"
 
 void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
-void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero);
-bool large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
-    size_t usize_max, bool zero);
+void *large_palloc(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
+bool  large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
+     size_t usize_max, bool zero);
 void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 
-void large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
-void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
-void large_dalloc(tsdn_t *tsdn, edata_t *edata);
-size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
-void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
-    bool reset_recent);
+void   large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
+void   large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
+void   large_dalloc(tsdn_t *tsdn, edata_t *edata);
+void   large_prof_info_get(
+      tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
 void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size);
 
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index d020ebec..46aba8ff 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -1,6 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_LOCKEDINT_H
 #define JEMALLOC_INTERNAL_LOCKEDINT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * In those architectures that support 64-bit atomics, we use atomic updates for
  * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
@@ -25,33 +30,34 @@ struct locked_zu_s {
 };
 
 #ifndef JEMALLOC_ATOMIC_U64
-#  define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
-#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode)			\
-    malloc_mutex_init(&(mu), name, rank, rank_mode)
-#  define LOCKEDINT_MTX(mtx) (&(mtx))
-#  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
-#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
-#  define LOCKEDINT_MTX_PREFORK(tsdn, mu) malloc_mutex_prefork(tsdn, &(mu))
-#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)			\
-    malloc_mutex_postfork_parent(tsdn, &(mu))
-#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)			\
-    malloc_mutex_postfork_child(tsdn, &(mu))
+#	define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
+#	define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode)                  \
+		malloc_mutex_init(&(mu), name, rank, rank_mode)
+#	define LOCKEDINT_MTX(mtx) (&(mtx))
+#	define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
+#	define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+#	define LOCKEDINT_MTX_PREFORK(tsdn, mu)                                \
+		malloc_mutex_prefork(tsdn, &(mu))
+#	define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)                        \
+		malloc_mutex_postfork_parent(tsdn, &(mu))
+#	define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)                         \
+		malloc_mutex_postfork_child(tsdn, &(mu))
 #else
-#  define LOCKEDINT_MTX_DECLARE(name)
-#  define LOCKEDINT_MTX(mtx) NULL
-#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
-#  define LOCKEDINT_MTX_LOCK(tsdn, mu)
-#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
-#  define LOCKEDINT_MTX_PREFORK(tsdn, mu)
-#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
-#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
+#	define LOCKEDINT_MTX_DECLARE(name)
+#	define LOCKEDINT_MTX(mtx) NULL
+#	define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
+#	define LOCKEDINT_MTX_LOCK(tsdn, mu)
+#	define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
+#	define LOCKEDINT_MTX_PREFORK(tsdn, mu)
+#	define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
+#	define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
 #endif
 
 #ifdef JEMALLOC_ATOMIC_U64
-#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
+#	define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
 #else
-#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx)			\
-    malloc_mutex_assert_owner(tsdn, (mtx))
+#	define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx)                       \
+		malloc_mutex_assert_owner(tsdn, (mtx))
 #endif
 
 static inline uint64_t
@@ -65,8 +71,7 @@ locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
 }
 
 static inline void
-locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
-    uint64_t x) {
+locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p, uint64_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
@@ -76,8 +81,7 @@ locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 }
 
 static inline void
-locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
-    uint64_t x) {
+locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p, uint64_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
@@ -94,7 +98,7 @@ locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
     const uint64_t x, const uint64_t modulus) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 	uint64_t before, after;
-	bool overflow;
+	bool     overflow;
 #ifdef JEMALLOC_ATOMIC_U64
 	before = atomic_load_u64(&p->val, ATOMIC_RELAXED);
 	do {
@@ -104,8 +108,8 @@ locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 		if (overflow) {
 			after %= modulus;
 		}
-	} while (!atomic_compare_exchange_weak_u64(&p->val, &before, after,
-	    ATOMIC_RELAXED, ATOMIC_RELAXED));
+	} while (!atomic_compare_exchange_weak_u64(
+	    &p->val, &before, after, ATOMIC_RELAXED, ATOMIC_RELAXED));
 #else
 	before = p->val;
 	after = before + x;
@@ -162,8 +166,7 @@ locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
 }
 
 static inline void
-locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
-    size_t x) {
+locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p, size_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
@@ -174,8 +177,7 @@ locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
 }
 
 static inline void
-locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
-    size_t x) {
+locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p, size_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 64208586..f213beda 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -1,14 +1,15 @@
 #ifndef JEMALLOC_INTERNAL_LOG_H
 #define JEMALLOC_INTERNAL_LOG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 
 #ifdef JEMALLOC_LOG
-#  define JEMALLOC_LOG_VAR_BUFSIZE 1000
+#	define JEMALLOC_LOG_VAR_BUFSIZE 1000
 #else
-#  define JEMALLOC_LOG_VAR_BUFSIZE 1
+#	define JEMALLOC_LOG_VAR_BUFSIZE 1
 #endif
 
 #define JEMALLOC_LOG_BUFSIZE 4096
@@ -26,16 +27,16 @@
  * log("extent.a", "log msg for extent.a"); // 5
  * log("extent.b", "log msg for extent.b"); // 6
  *
- * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and
+ * And your malloc_conf option is "log:arena.a|extent", then lines 2, 4, 5, and
  * 6 will print at runtime.  You can enable logging from all log vars by
- * writing "log=.".
+ * writing "log:.".
  *
  * None of this should be regarded as a stable API for right now.  It's intended
  * as a debugging interface, to let us keep around some of our printf-debugging
  * statements.
  */
 
-extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+extern char       log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 extern atomic_b_t log_init_done;
 
 typedef struct log_var_s log_var_t;
@@ -44,7 +45,7 @@ struct log_var_s {
 	 * Lowest bit is "inited", second lowest is "enabled".  Putting them in
 	 * a single word lets us avoid any fences on weak architectures.
 	 */
-	atomic_u_t state;
+	atomic_u_t  state;
 	const char *name;
 };
 
@@ -52,7 +53,8 @@ struct log_var_s {
 #define LOG_INITIALIZED_NOT_ENABLED 1U
 #define LOG_ENABLED 2U
 
-#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str}
+#define LOG_VAR_INIT(name_str)                                                 \
+	{ ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str }
 
 /*
  * Returns the value we should assume for state (which is not necessarily
@@ -62,21 +64,21 @@ struct log_var_s {
 unsigned log_var_update_state(log_var_t *log_var);
 
 /* We factor out the metadata management to allow us to test more easily. */
-#define log_do_begin(log_var)						\
-if (config_log) {							\
-	unsigned log_state = atomic_load_u(&(log_var).state,		\
-	    ATOMIC_RELAXED);						\
-	if (unlikely(log_state == LOG_NOT_INITIALIZED)) {		\
-		log_state = log_var_update_state(&(log_var));		\
-		assert(log_state != LOG_NOT_INITIALIZED);		\
-	}								\
-	if (log_state == LOG_ENABLED) {					\
-		{
-			/* User code executes here. */
-#define log_do_end(log_var)						\
-		}							\
-	}								\
-}
+#define log_do_begin(log_var)                                                  \
+	if (config_log) {                                                      \
+		unsigned log_state = atomic_load_u(                            \
+		    &(log_var).state, ATOMIC_RELAXED);                         \
+		if (unlikely(log_state == LOG_NOT_INITIALIZED)) {              \
+			log_state = log_var_update_state(&(log_var));          \
+			assert(log_state != LOG_NOT_INITIALIZED);              \
+		}                                                              \
+		if (log_state == LOG_ENABLED) {                                \
+			{
+/* User code executes here. */
+#define log_do_end(log_var)                                                    \
+	}                                                                      \
+	}                                                                      \
+	}
 
 /*
  * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during
@@ -87,29 +89,29 @@ if (config_log) {							\
  */
 static inline void
 log_impl_varargs(const char *name, ...) {
-	char buf[JEMALLOC_LOG_BUFSIZE];
+	char    buf[JEMALLOC_LOG_BUFSIZE];
 	va_list ap;
 
 	va_start(ap, name);
 	const char *format = va_arg(ap, const char *);
-	size_t dst_offset = 0;
+	size_t      dst_offset = 0;
 	dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
-	dst_offset += malloc_vsnprintf(buf + dst_offset,
-	    JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
-	dst_offset += malloc_snprintf(buf + dst_offset,
-	    JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+	dst_offset += malloc_vsnprintf(
+	    buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
+	malloc_snprintf(
+	    buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
 	va_end(ap);
 
 	malloc_write(buf);
 }
 
 /* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
-#define LOG(log_var_str, ...)						\
-do {									\
-	static log_var_t log_var = LOG_VAR_INIT(log_var_str);		\
-	log_do_begin(log_var)						\
-		log_impl_varargs((log_var).name, __VA_ARGS__);		\
-	log_do_end(log_var)						\
-} while (0)
+#define LOG(log_var_str, ...)                                                  \
+	do {                                                                   \
+		static log_var_t log_var = LOG_VAR_INIT(log_var_str);          \
+		log_do_begin(log_var)                                          \
+		    log_impl_varargs((log_var).name, __VA_ARGS__);             \
+		log_do_end(log_var)                                            \
+	} while (0)
 
 #endif /* JEMALLOC_INTERNAL_LOG_H */
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index a375bdae..0f70c3c3 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -1,105 +1,101 @@
 #ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
 #define JEMALLOC_INTERNAL_MALLOC_IO_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 #ifdef _WIN32
-#  ifdef _WIN64
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX "ll"
-#  else
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX ""
-#  endif
-#  define FMTd32 "d"
-#  define FMTu32 "u"
-#  define FMTx32 "x"
-#  define FMTd64 FMT64_PREFIX "d"
-#  define FMTu64 FMT64_PREFIX "u"
-#  define FMTx64 FMT64_PREFIX "x"
-#  define FMTdPTR FMTPTR_PREFIX "d"
-#  define FMTuPTR FMTPTR_PREFIX "u"
-#  define FMTxPTR FMTPTR_PREFIX "x"
+#	ifdef _WIN64
+#		define FMT64_PREFIX "ll"
+#		define FMTPTR_PREFIX "ll"
+#	else
+#		define FMT64_PREFIX "ll"
+#		define FMTPTR_PREFIX ""
+#	endif
+#	define FMTd32 "d"
+#	define FMTu32 "u"
+#	define FMTx32 "x"
+#	define FMTd64 FMT64_PREFIX "d"
+#	define FMTu64 FMT64_PREFIX "u"
+#	define FMTx64 FMT64_PREFIX "x"
+#	define FMTdPTR FMTPTR_PREFIX "d"
+#	define FMTuPTR FMTPTR_PREFIX "u"
+#	define FMTxPTR FMTPTR_PREFIX "x"
 #else
-#  include <inttypes.h>
-#  define FMTd32 PRId32
-#  define FMTu32 PRIu32
-#  define FMTx32 PRIx32
-#  define FMTd64 PRId64
-#  define FMTu64 PRIu64
-#  define FMTx64 PRIx64
-#  define FMTdPTR PRIdPTR
-#  define FMTuPTR PRIuPTR
-#  define FMTxPTR PRIxPTR
+#	include <inttypes.h>
+#	define FMTd32 PRId32
+#	define FMTu32 PRIu32
+#	define FMTx32 PRIx32
+#	define FMTd64 PRId64
+#	define FMTu64 PRIu64
+#	define FMTx64 PRIx64
+#	define FMTdPTR PRIdPTR
+#	define FMTuPTR PRIuPTR
+#	define FMTxPTR PRIxPTR
 #endif
 
 /* Size of stack-allocated buffer passed to buferror(). */
-#define BUFERROR_BUF		64
+#define BUFERROR_BUF 64
 
 /*
  * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
  * large enough for all possible uses within jemalloc.
  */
-#define MALLOC_PRINTF_BUFSIZE	4096
+#define MALLOC_PRINTF_BUFSIZE 4096
 
 write_cb_t wrtmessage;
-int buferror(int err, char *buf, size_t buflen);
-uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
-    int base);
+int        buferror(int err, char *buf, size_t buflen);
+uintmax_t  malloc_strtoumax(
+     const char *restrict nptr, char **restrict endptr, int base);
 void malloc_write(const char *s);
 
 /*
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
  * point math.
  */
-size_t malloc_vsnprintf(char *str, size_t size, const char *format,
-    va_list ap);
+size_t malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap);
 size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 /*
  * The caller can set write_cb to null to choose to print with the
  * je_malloc_message hook.
  */
-void malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
-    va_list ap);
+void malloc_vcprintf(
+    write_cb_t *write_cb, void *cbopaque, const char *format, va_list ap);
 void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
     ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
-static inline ssize_t
-malloc_write_fd(int fd, const void *buf, size_t count) {
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
-	/*
-	 * Use syscall(2) rather than write(2) when possible in order to avoid
-	 * the possibility of memory allocation within libc.  This is necessary
-	 * on FreeBSD; most operating systems do not have this problem though.
-	 *
-	 * syscall() returns long or int, depending on platform, so capture the
-	 * result in the widest plausible type to avoid compiler warnings.
-	 */
-	long result = syscall(SYS_write, fd, buf, count);
+ssize_t malloc_write_fd(int fd, const void *buf, size_t count);
+ssize_t malloc_read_fd(int fd, void *buf, size_t count);
+
+static inline int
+malloc_open(const char *path, int flags) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+	return (int)syscall(SYS_open, path, flags);
+#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
+	return (int)syscall(SYS_openat, AT_FDCWD, path, flags);
 #else
-	ssize_t result = (ssize_t)write(fd, buf,
-#ifdef _WIN32
-	    (unsigned int)
+	return open(path, flags);
 #endif
-	    count);
-#endif
-	return (ssize_t)result;
 }
 
-static inline ssize_t
-malloc_read_fd(int fd, void *buf, size_t count) {
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
-	long result = syscall(SYS_read, fd, buf, count);
+static inline int
+malloc_close(int fd) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+	return (int)syscall(SYS_close, fd);
 #else
-	ssize_t result = read(fd, buf,
-#ifdef _WIN32
-	    (unsigned int)
+	return close(fd);
 #endif
-	    count);
+}
+
+static inline off_t
+malloc_lseek(int fd, off_t offset, int whence) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_lseek)
+	return (off_t)syscall(SYS_lseek, fd, offset, whence);
+#else
+	return lseek(fd, offset, whence);
 #endif
-	return (ssize_t)result;
 }
 
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/include/jemalloc/internal/mpsc_queue.h b/include/jemalloc/internal/mpsc_queue.h
index 316ea9b1..86f4898f 100644
--- a/include/jemalloc/internal/mpsc_queue.h
+++ b/include/jemalloc/internal/mpsc_queue.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MPSC_QUEUE_H
 #define JEMALLOC_INTERNAL_MPSC_QUEUE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 
 /*
@@ -25,6 +26,7 @@
  * two-stack tricks reverses orders in the lock-free first stack).
  */
 
+/* clang-format off */
 #define mpsc_queue(a_type)						\
 struct {								\
 	atomic_p_t tail;						\
@@ -130,5 +132,6 @@ a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst) {		\
 	}								\
 	ql_concat(dst, &reversed, a_link);				\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_MPSC_QUEUE_H */
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 63a0b1b3..943c7928 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_H
 #define JEMALLOC_INTERNAL_MUTEX_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/tsd.h"
@@ -30,26 +31,29 @@ struct malloc_mutex_s {
 			 * avoid prefetching a modified cacheline (for the
 			 * unlocking thread).
 			 */
-			mutex_prof_data_t	prof_data;
-#ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-			SRWLOCK         	lock;
-#  else
-			CRITICAL_SECTION	lock;
-#  endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-			os_unfair_lock		lock;
-#elif (defined(JEMALLOC_MUTEX_INIT_CB))
-			pthread_mutex_t		lock;
-			malloc_mutex_t		*postponed_next;
-#else
-			pthread_mutex_t		lock;
-#endif
+			mutex_prof_data_t prof_data;
 			/*
 			 * Hint flag to avoid exclusive cache line contention
-			 * during spin waiting
+			 * during spin waiting.  Placed along with prof_data
+			 * since it's always modified even with no contention.
+			 * Modified by the lock owner only (after acquired, and
+			 * before release), and may be read by other threads.
 			 */
-			atomic_b_t		locked;
+			atomic_b_t locked;
+#ifdef _WIN32
+#	if _WIN32_WINNT >= 0x0600
+			SRWLOCK lock;
+#	else
+			CRITICAL_SECTION lock;
+#	endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+			os_unfair_lock lock;
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+			pthread_mutex_t lock;
+			malloc_mutex_t *postponed_next;
+#else
+			pthread_mutex_t lock;
+#endif
 		};
 		/*
 		 * We only touch witness when configured w/ debug.  However we
@@ -58,82 +62,118 @@ struct malloc_mutex_s {
 		 * memory cost.
 		 */
 #if !defined(JEMALLOC_DEBUG)
-		witness_t			witness;
-		malloc_mutex_lock_order_t	lock_order;
+		witness_t                 witness;
+		malloc_mutex_lock_order_t lock_order;
 #endif
 	};
 
 #if defined(JEMALLOC_DEBUG)
-	witness_t			witness;
-	malloc_mutex_lock_order_t	lock_order;
+	witness_t                 witness;
+	malloc_mutex_lock_order_t lock_order;
 #endif
 };
 
 #ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-#    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  ReleaseSRWLockExclusive(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
-#  else
-#    define MALLOC_MUTEX_LOCK(m)    EnterCriticalSection(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  LeaveCriticalSection(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
-#  endif
+#	if _WIN32_WINNT >= 0x0600
+#		define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock)
+#		define MALLOC_MUTEX_UNLOCK(m)                                 \
+			ReleaseSRWLockExclusive(&(m)->lock)
+#		define MALLOC_MUTEX_TRYLOCK(m)                                \
+			(!TryAcquireSRWLockExclusive(&(m)->lock))
+#	else
+#		define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock)
+#		define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock)
+#		define MALLOC_MUTEX_TRYLOCK(m)                                \
+			(!TryEnterCriticalSection(&(m)->lock))
+#	endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#    define MALLOC_MUTEX_LOCK(m)    os_unfair_lock_lock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  os_unfair_lock_unlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
+#	define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock)
+#	define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock)
+#	define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
 #else
-#    define MALLOC_MUTEX_LOCK(m)    pthread_mutex_lock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  pthread_mutex_unlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
+#	define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock)
+#	define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock)
+#	define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
 #endif
 
-#define LOCK_PROF_DATA_INITIALIZER					\
-    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,		\
-	    ATOMIC_INIT(0), 0, NULL, 0}
+#define LOCK_PROF_DATA_INITIALIZER                                             \
+	{                                                                      \
+		NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,     \
+		    ATOMIC_INIT(0), 0, NULL, 0                                 \
+	}
 
 #ifdef _WIN32
-#  define MALLOC_MUTEX_INITIALIZER
+#	define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#  if defined(JEMALLOC_DEBUG)
-#    define MALLOC_MUTEX_INITIALIZER					\
-  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
-         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#    define MALLOC_MUTEX_INITIALIZER                      \
-  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}},  \
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	if defined(JEMALLOC_DEBUG)
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  if (defined(JEMALLOC_DEBUG))
-#     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}},	\
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}},	\
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	if (defined(JEMALLOC_DEBUG))
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER, NULL}},         \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER, NULL}},         \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 
 #else
-#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#  if defined(JEMALLOC_DEBUG)
-#    define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#    define MALLOC_MUTEX_INITIALIZER                          \
-     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}},	\
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+#	if defined(JEMALLOC_DEBUG)
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER}},               \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER}},               \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 #endif
 
 #ifdef JEMALLOC_LAZY_LOCK
 extern bool isthreaded;
 #else
-#  undef isthreaded /* Undo private_namespace.h definition. */
-#  define isthreaded true
+#	undef isthreaded /* Undo private_namespace.h definition. */
+#	define isthreaded true
 #endif
 
 bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
@@ -154,7 +194,12 @@ malloc_mutex_lock_final(malloc_mutex_t *mutex) {
 
 static inline bool
 malloc_mutex_trylock_final(malloc_mutex_t *mutex) {
-	return MALLOC_MUTEX_TRYLOCK(mutex);
+	bool failed = MALLOC_MUTEX_TRYLOCK(mutex);
+	if (!failed) {
+		atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
+	}
+
+	return failed;
 }
 
 static inline void
@@ -169,15 +214,21 @@ mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	}
 }
 
+static inline bool
+malloc_mutex_is_locked(malloc_mutex_t *mutex) {
+	/* Used for sanity checking only. */
+	return atomic_load_b(&mutex->locked, ATOMIC_RELAXED);
+}
+
 /* Trylock: return false if the lock is successfully acquired. */
 static inline bool
 malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
-			atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 			return true;
 		}
+		assert(malloc_mutex_is_locked(mutex));
 		mutex_owner_stats_update(tsdn, mutex);
 	}
 	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
@@ -199,12 +250,12 @@ malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 	if (sum->max_n_thds < data->max_n_thds) {
 		sum->max_n_thds = data->max_n_thds;
 	}
-	uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
-	    ATOMIC_RELAXED);
-	uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
-	    &data->n_waiting_thds, ATOMIC_RELAXED);
-	atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
-	    ATOMIC_RELAXED);
+	uint32_t cur_n_waiting_thds = atomic_load_u32(
+	    &sum->n_waiting_thds, ATOMIC_RELAXED);
+	uint32_t new_n_waiting_thds = cur_n_waiting_thds
+	    + atomic_load_u32(&data->n_waiting_thds, ATOMIC_RELAXED);
+	atomic_store_u32(
+	    &sum->n_waiting_thds, new_n_waiting_thds, ATOMIC_RELAXED);
 	sum->n_owner_switches += data->n_owner_switches;
 	sum->n_lock_ops += data->n_lock_ops;
 }
@@ -215,8 +266,8 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			malloc_mutex_lock_slow(mutex);
-			atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 		}
+		assert(malloc_mutex_is_locked(mutex));
 		mutex_owner_stats_update(tsdn, mutex);
 	}
 	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
@@ -224,9 +275,10 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 
 static inline void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
 	witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
+		assert(malloc_mutex_is_locked(mutex));
+		atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
 		MALLOC_MUTEX_UNLOCK(mutex);
 	}
 }
@@ -234,6 +286,9 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 static inline void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+	if (isthreaded) {
+		assert(malloc_mutex_is_locked(mutex));
+	}
 }
 
 static inline void
@@ -255,16 +310,16 @@ malloc_mutex_prof_copy(mutex_prof_data_t *dst, mutex_prof_data_t *source) {
 
 /* Copy the prof data from mutex for processing. */
 static inline void
-malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_read(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
 	malloc_mutex_prof_copy(data, &mutex->prof_data);
 }
 
 static inline void
-malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_accum(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	mutex_prof_data_t *source = &mutex->prof_data;
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
@@ -286,8 +341,8 @@ malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
 
 /* Compare the prof data and update to the maximum. */
 static inline void
-malloc_mutex_prof_max_update(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_max_update(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	mutex_prof_data_t *source = &mutex->prof_data;
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 4a526a5a..572200f3 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -1,80 +1,81 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_PROF_H
 #define JEMALLOC_INTERNAL_MUTEX_PROF_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/tsd_types.h"
 
-#define MUTEX_PROF_GLOBAL_MUTEXES					\
-    OP(background_thread)						\
-    OP(max_per_bg_thd)							\
-    OP(ctl)								\
-    OP(prof)								\
-    OP(prof_thds_data)							\
-    OP(prof_dump)							\
-    OP(prof_recent_alloc)						\
-    OP(prof_recent_dump)						\
-    OP(prof_stats)
+#define MUTEX_PROF_GLOBAL_MUTEXES                                              \
+	OP(background_thread)                                                  \
+	OP(max_per_bg_thd)                                                     \
+	OP(ctl)                                                                \
+	OP(prof)                                                               \
+	OP(prof_thds_data)                                                     \
+	OP(prof_dump)                                                          \
+	OP(prof_recent_alloc)                                                  \
+	OP(prof_recent_dump)                                                   \
+	OP(prof_stats)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
 	MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
-	mutex_prof_num_global_mutexes
+	    mutex_prof_num_global_mutexes
 } mutex_prof_global_ind_t;
 
-#define MUTEX_PROF_ARENA_MUTEXES					\
-    OP(large)								\
-    OP(extent_avail)							\
-    OP(extents_dirty)							\
-    OP(extents_muzzy)							\
-    OP(extents_retained)						\
-    OP(decay_dirty)							\
-    OP(decay_muzzy)							\
-    OP(base)								\
-    OP(tcache_list)							\
-    OP(hpa_shard)							\
-    OP(hpa_shard_grow)							\
-    OP(hpa_sec)
+#define MUTEX_PROF_ARENA_MUTEXES                                               \
+	OP(large)                                                              \
+	OP(extent_avail)                                                       \
+	OP(extents_dirty)                                                      \
+	OP(extents_muzzy)                                                      \
+	OP(extents_retained)                                                   \
+	OP(decay_dirty)                                                        \
+	OP(decay_muzzy)                                                        \
+	OP(base)                                                               \
+	OP(tcache_list)                                                        \
+	OP(hpa_shard)                                                          \
+	OP(hpa_shard_grow)                                                     \
+	OP(hpa_sec)
 
 typedef enum {
 #define OP(mtx) arena_prof_mutex_##mtx,
 	MUTEX_PROF_ARENA_MUTEXES
 #undef OP
-	mutex_prof_num_arena_mutexes
+	    mutex_prof_num_arena_mutexes
 } mutex_prof_arena_ind_t;
 
 /*
  * The forth parameter is a boolean value that is true for derived rate counters
  * and false for real ones.
  */
-#define MUTEX_PROF_UINT64_COUNTERS					\
-    OP(num_ops, uint64_t, "n_lock_ops", false, num_ops)					\
-    OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops)				\
-    OP(num_wait, uint64_t, "n_waiting", false, num_wait)				\
-    OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait)				\
-    OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq)			\
-    OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq)			\
-    OP(num_owner_switch, uint64_t, "n_owner_switch", false, num_owner_switch)		\
-    OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch)	\
-    OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time)		\
-    OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time)		\
-    OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
+#define MUTEX_PROF_UINT64_COUNTERS                                             \
+	OP(num_ops, uint64_t, "n_lock_ops", false, num_ops)                    \
+	OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops)                     \
+	OP(num_wait, uint64_t, "n_waiting", false, num_wait)                   \
+	OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait)                   \
+	OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq)          \
+	OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq)           \
+	OP(num_owner_switch, uint64_t, "n_owner_switch", false,                \
+	    num_owner_switch)                                                  \
+	OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch)   \
+	OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time) \
+	OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time)     \
+	OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
 
-#define MUTEX_PROF_UINT32_COUNTERS					\
-    OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
+#define MUTEX_PROF_UINT32_COUNTERS                                             \
+	OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
 
-#define MUTEX_PROF_COUNTERS						\
-		MUTEX_PROF_UINT64_COUNTERS				\
-		MUTEX_PROF_UINT32_COUNTERS
+#define MUTEX_PROF_COUNTERS                                                    \
+	MUTEX_PROF_UINT64_COUNTERS                                             \
+	MUTEX_PROF_UINT32_COUNTERS
 
 #define OP(counter, type, human, derived, base_counter) mutex_counter_##counter,
 
-#define COUNTER_ENUM(counter_list, t)					\
-		typedef enum {						\
-			counter_list					\
-			mutex_prof_num_##t##_counters			\
-		} mutex_prof_##t##_counter_ind_t;
+#define COUNTER_ENUM(counter_list, t)                                          \
+	typedef enum {                                                         \
+		counter_list mutex_prof_num_##t##_counters                     \
+	} mutex_prof_##t##_counter_ind_t;
 
 COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
 COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t)
@@ -88,17 +89,17 @@ typedef struct {
 	 * contention.  We update them once we have the lock.
 	 */
 	/* Total time (in nano seconds) spent waiting on this mutex. */
-	nstime_t		tot_wait_time;
+	nstime_t tot_wait_time;
 	/* Max time (in nano seconds) spent on a single lock operation. */
-	nstime_t		max_wait_time;
+	nstime_t max_wait_time;
 	/* # of times have to wait for this mutex (after spinning). */
-	uint64_t		n_wait_times;
+	uint64_t n_wait_times;
 	/* # of times acquired the mutex through local spinning. */
-	uint64_t		n_spin_acquired;
+	uint64_t n_spin_acquired;
 	/* Max # of threads waiting for the mutex at the same time. */
-	uint32_t		max_n_thds;
+	uint32_t max_n_thds;
 	/* Current # of threads waiting on the lock.  Atomic synced. */
-	atomic_u32_t		n_waiting_thds;
+	atomic_u32_t n_waiting_thds;
 
 	/*
 	 * Data touched on the fast path.  These are modified right after we
@@ -107,11 +108,11 @@ typedef struct {
 	 * cacheline.
 	 */
 	/* # of times the mutex holder is different than the previous one. */
-	uint64_t		n_owner_switches;
+	uint64_t n_owner_switches;
 	/* Previous mutex holder, to facilitate n_owner_switches. */
-	tsdn_t			*prev_owner;
+	tsdn_t *prev_owner;
 	/* # of lock() operations in total. */
-	uint64_t		n_lock_ops;
+	uint64_t n_lock_ops;
 } mutex_prof_data_t;
 
 #endif /* JEMALLOC_INTERNAL_MUTEX_PROF_H */
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 486e5cca..0848b9d0 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,14 +1,19 @@
 #ifndef JEMALLOC_INTERNAL_NSTIME_H
 #define JEMALLOC_INTERNAL_NSTIME_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+
 /* Maximum supported number of seconds (~584 years). */
 #define NSTIME_SEC_MAX KQU(18446744072)
 
 #define NSTIME_MAGIC ((uint32_t)0xb8a9ce37)
 #ifdef JEMALLOC_DEBUG
-#  define NSTIME_ZERO_INITIALIZER {0, NSTIME_MAGIC}
+#	define NSTIME_ZERO_INITIALIZER                                        \
+		{ 0, NSTIME_MAGIC }
 #else
-#  define NSTIME_ZERO_INITIALIZER {0}
+#	define NSTIME_ZERO_INITIALIZER                                        \
+		{ 0 }
 #endif
 
 typedef struct {
@@ -20,43 +25,43 @@ typedef struct {
 
 static const nstime_t nstime_zero = NSTIME_ZERO_INITIALIZER;
 
-void nstime_init(nstime_t *time, uint64_t ns);
-void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
+void     nstime_init(nstime_t *time, uint64_t ns);
+void     nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t nstime_ns(const nstime_t *time);
+uint64_t nstime_ms(const nstime_t *time);
 uint64_t nstime_sec(const nstime_t *time);
-uint64_t nstime_msec(const nstime_t *time);
 uint64_t nstime_nsec(const nstime_t *time);
-void nstime_copy(nstime_t *time, const nstime_t *source);
-int nstime_compare(const nstime_t *a, const nstime_t *b);
-void nstime_add(nstime_t *time, const nstime_t *addend);
-void nstime_iadd(nstime_t *time, uint64_t addend);
-void nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
-void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
-void nstime_imultiply(nstime_t *time, uint64_t multiplier);
-void nstime_idivide(nstime_t *time, uint64_t divisor);
+void     nstime_copy(nstime_t *time, const nstime_t *source);
+int      nstime_compare(const nstime_t *a, const nstime_t *b);
+void     nstime_add(nstime_t *time, const nstime_t *addend);
+void     nstime_iadd(nstime_t *time, uint64_t addend);
+void     nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void     nstime_isubtract(nstime_t *time, uint64_t subtrahend);
+void     nstime_imultiply(nstime_t *time, uint64_t multiplier);
+void     nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
+uint64_t nstime_ns_between(const nstime_t *earlier, const nstime_t *later);
+uint64_t nstime_ms_between(const nstime_t *earlier, const nstime_t *later);
 uint64_t nstime_ns_since(const nstime_t *past);
+uint64_t nstime_ms_since(const nstime_t *past);
 
-typedef bool (nstime_monotonic_t)(void);
+typedef bool(nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
 
-typedef void (nstime_update_t)(nstime_t *);
+typedef void(nstime_update_t)(nstime_t *);
 extern nstime_update_t *JET_MUTABLE nstime_update;
 
-typedef void (nstime_prof_update_t)(nstime_t *);
+typedef void(nstime_prof_update_t)(nstime_t *);
 extern nstime_prof_update_t *JET_MUTABLE nstime_prof_update;
 
 void nstime_init_update(nstime_t *time);
 void nstime_prof_init_update(nstime_t *time);
 
-enum prof_time_res_e {
-	prof_time_res_default = 0,
-	prof_time_res_high = 1
-};
+enum prof_time_res_e { prof_time_res_default = 0, prof_time_res_high = 1 };
 typedef enum prof_time_res_e prof_time_res_t;
 
-extern prof_time_res_t opt_prof_time_res;
-extern const char *prof_time_res_mode_names[];
+extern prof_time_res_t   opt_prof_time_res;
+extern const char *const prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
@@ -64,7 +69,7 @@ nstime_init_zero(nstime_t *time) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-nstime_equals_zero(nstime_t *time) {
+nstime_equals_zero(const nstime_t *time) {
 	int diff = nstime_compare(time, &nstime_zero);
 	assert(diff >= 0);
 	return diff == 0;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4748a05b..f3910ad8 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
@@ -95,12 +96,6 @@ struct pa_shard_s {
 	/* Allocates from a PAC. */
 	pac_t pac;
 
-	/*
-	 * We place a small extent cache in front of the HPA, since we intend
-	 * these configurations to use many fewer arenas, and therefore have a
-	 * higher risk of hot locks.
-	 */
-	sec_t hpa_sec;
 	hpa_shard_t hpa_shard;
 
 	/* The source of edata_t objects. */
@@ -108,7 +103,7 @@ struct pa_shard_s {
 
 	unsigned ind;
 
-	malloc_mutex_t *stats_mtx;
+	malloc_mutex_t   *stats_mtx;
 	pa_shard_stats_t *stats;
 
 	/* The emap this shard is tied to. */
@@ -120,8 +115,8 @@ struct pa_shard_s {
 
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
-	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
-	    pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
+	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0
+	    && pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
 }
 
 static inline ehooks_t *
@@ -131,7 +126,7 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 
 /* Returns true on error. */
 bool pa_central_init(pa_central_t *central, base_t *base, bool hpa,
-    hpa_hooks_t *hpa_hooks);
+    const hpa_hooks_t *hpa_hooks);
 
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
@@ -165,6 +160,9 @@ void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
  */
 void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
+/* Flush any caches used by shard */
+void pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard);
+
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
@@ -185,10 +183,10 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
  * (We could make generated_dirty the return value of course, but this is more
  * consistent with the shrink pathway and our error codes here).
  */
-void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *deferred_work_generated);
-bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
-    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+void    pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+       bool *deferred_work_generated);
+bool    pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
+       ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
 
 /*
@@ -198,10 +196,10 @@ ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
  * though, the arena, background thread, and PAC modules are tightly interwoven
  * in a way that's tricky to extricate, so we only do the HPA-specific parts.
  */
-void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
-    bool deferral_allowed);
-void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
-void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, pa_shard_t *shard, bool deferral_allowed);
+void     pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void     pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
 uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
 
 /******************************************************************************/
@@ -223,13 +221,16 @@ void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 
-void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
-    size_t *ndirty, size_t *nmuzzy);
+size_t pa_shard_nactive(pa_shard_t *shard);
+size_t pa_shard_ndirty(pa_shard_t *shard);
+size_t pa_shard_nmuzzy(pa_shard_t *shard);
+
+void pa_shard_basic_stats_merge(
+    pa_shard_t *shard, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
-    size_t *resident);
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident);
 
 /*
  * Reads the PA-owned mutex stats into the output stats array, at the
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 01c4e6af..a19c8b35 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,11 +1,15 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/decay.h"
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/pai.h"
 #include "san_bump.h"
 
-
 /*
  * Page allocator classic; an implementation of the PAI interface that:
  * - Can be used for arenas with custom extent hooks.
@@ -91,12 +95,12 @@ struct pac_s {
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
 
-	base_t *base;
-	emap_t *emap;
+	base_t        *base;
+	emap_t        *emap;
 	edata_cache_t *edata_cache;
 
 	/* The grow info for the retained ecache. */
-	exp_grow_t exp_grow;
+	exp_grow_t     exp_grow;
 	malloc_mutex_t grow_mtx;
 
 	/* Special allocator for guarded frequently reused extents. */
@@ -115,12 +119,37 @@ struct pac_s {
 	decay_t decay_muzzy; /* muzzy --> retained */
 
 	malloc_mutex_t *stats_mtx;
-	pac_stats_t *stats;
+	pac_stats_t    *stats;
 
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
 };
 
+typedef struct pac_thp_s pac_thp_t;
+struct pac_thp_s {
+	/*
+	 * opt_thp controls THP for user requested allocations. Settings
+	 * "always", "never" and "default" are available if THP is supported
+	 * by the OS and the default extent hooks are used:
+	 * - "always" and "never" are covered by pages_set_thp_state() in
+	 *   ehooks_default_alloc_impl().
+	 * - "default" makes no change for all the other auto arenas except
+	 *   the huge arena. For the huge arena, we might also look at
+	 *   opt_metadata_thp to decide whether to use THP or not.
+	 *   This is a temporary remedy before HPA is fully supported.
+	 */
+	bool thp_madvise;
+	/* Below fields are protected by the lock. */
+	malloc_mutex_t lock;
+	bool           auto_thp_switched;
+	atomic_u_t     n_thp_lazy;
+	/*
+	 * List that tracks HUGEPAGE aligned regions that're lazily hugified
+	 * in auto thp mode.
+	 */
+	edata_list_active_t thp_lazy_list;
+};
+
 bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
     ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
@@ -166,11 +195,11 @@ bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
  *
  * Returns true on error (if the new limit is not valid).
  */
-bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
-    size_t *new_limit);
+bool pac_retain_grow_limit_get_set(
+    tsdn_t *tsdn, pac_t *pac, size_t *old_limit, size_t *new_limit);
 
-bool pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
-    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+bool    pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
+       ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
 
 void pac_reset(tsdn_t *tsdn, pac_t *pac);
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index ad1f606a..a4282c9b 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -1,28 +1,39 @@
 #ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 #define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
+/* Actual operating system page size, detected during bootstrap, <= PAGE. */
+extern size_t os_page;
+
 /* Page size.  LG_PAGE is determined by the configure script. */
 #ifdef PAGE_MASK
-#  undef PAGE_MASK
+#	undef PAGE_MASK
 #endif
-#define PAGE		((size_t)(1U << LG_PAGE))
-#define PAGE_MASK	((size_t)(PAGE - 1))
+#define PAGE ((size_t)(1U << LG_PAGE))
+#define PAGE_MASK ((size_t)(PAGE - 1))
 /* Return the page base address for the page containing address a. */
-#define PAGE_ADDR2BASE(a)						\
-	((void *)((uintptr_t)(a) & ~PAGE_MASK))
+#define PAGE_ADDR2BASE(a) ALIGNMENT_ADDR2BASE(a, PAGE)
 /* Return the smallest pagesize multiple that is >= s. */
-#define PAGE_CEILING(s)							\
-	(((s) + PAGE_MASK) & ~PAGE_MASK)
+#define PAGE_CEILING(s) (((s) + PAGE_MASK) & ~PAGE_MASK)
 /* Return the largest pagesize multiple that is <=s. */
-#define PAGE_FLOOR(s) 							\
-	((s) & ~PAGE_MASK)
+#define PAGE_FLOOR(s) ((s) & ~PAGE_MASK)
 
 /* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
-#define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
-#define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
+#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE))
+#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1))
+
+/*
+ * Used to validate that the hugepage size is not unexpectedly high.  The huge
+ * page features (HPA, metadata_thp) are primarily designed with a 2M THP size
+ * in mind.  Much larger sizes are not tested and likely to cause issues such as
+ * bad fragmentation or simply broken.
+ */
+#define HUGEPAGE_MAX_EXPECTED_SIZE ((size_t)(16U << 20))
 
 #if LG_HUGEPAGE != 0
-#  define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
+#	define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
 #else
 /*
  * It's convenient to define arrays (or bitmaps) of HUGEPAGE_PAGES lengths.  If
@@ -31,19 +42,17 @@
  * that this value is at least 1.  (We won't ever run in this degraded state;
  * hpa_supported() returns false in this case.
  */
-#  define HUGEPAGE_PAGES 1
+#	define HUGEPAGE_PAGES 1
 #endif
 
 /* Return the huge page base address for the huge page containing address a. */
-#define HUGEPAGE_ADDR2BASE(a)						\
-	((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
+#define HUGEPAGE_ADDR2BASE(a) ALIGNMENT_ADDR2BASE(a, HUGEPAGE)
 /* Return the smallest pagesize multiple that is >= s. */
-#define HUGEPAGE_CEILING(s)						\
-	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
+#define HUGEPAGE_CEILING(s) (((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
 
 /* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */
 #if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE)
-#  define PAGES_CAN_PURGE_LAZY
+#	define PAGES_CAN_PURGE_LAZY
 #endif
 /*
  * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported.
@@ -54,10 +63,11 @@
  * next step after purging on Windows anyway, there's no point in adding such
  * complexity.
  */
-#if !defined(_WIN32) && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)) || \
-    defined(JEMALLOC_MAPS_COALESCE))
-#  define PAGES_CAN_PURGE_FORCED
+#if !defined(_WIN32)                                                           \
+    && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                              \
+            && defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS))                 \
+        || defined(JEMALLOC_MAPS_COALESCE))
+#	define PAGES_CAN_PURGE_FORCED
 #endif
 
 static const bool pages_can_purge_lazy =
@@ -76,7 +86,7 @@ static const bool pages_can_purge_forced =
     ;
 
 #if defined(JEMALLOC_HAVE_MADVISE_HUGE) || defined(JEMALLOC_HAVE_MEMCNTL)
-#  define PAGES_CAN_HUGIFY
+#	define PAGES_CAN_HUGIFY
 #endif
 
 static const bool pages_can_hugify =
@@ -87,32 +97,46 @@ static const bool pages_can_hugify =
 #endif
     ;
 
+/*
+ * thp_mode_t are values for opt.thp, while system_thp_mode_t is for kernel thp
+ * settings, i.e., init_system_thp_mode.
+ */
 typedef enum {
-	thp_mode_default       = 0, /* Do not change hugepage settings. */
-	thp_mode_always        = 1, /* Always set MADV_HUGEPAGE. */
-	thp_mode_never         = 2, /* Always set MADV_NOHUGEPAGE. */
+	thp_mode_do_nothing = 0, /* Respect kernel thp settings. */
+	thp_mode_always = 1,  /* Always set MADV_HUGEPAGE. */
+	thp_mode_never = 2,   /* Always set MADV_NOHUGEPAGE. */
 
-	thp_mode_names_limit   = 3, /* Used for option processing. */
-	thp_mode_not_supported = 3  /* No THP support detected. */
+	thp_mode_names_limit = 3,  /* Used for option processing. */
+	thp_mode_not_supported = 3 /* No THP support detected. */
 } thp_mode_t;
 
-#define THP_MODE_DEFAULT thp_mode_default
-extern thp_mode_t opt_thp;
-extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */
-extern const char *thp_mode_names[];
+typedef enum {
+	system_thp_mode_madvise = 0,     /* Kernel THP mode: madvise */
+	system_thp_mode_always = 1,      /* Kernel THP mode: always */
+	system_thp_mode_never = 2,       /* Kernel THP mode: never */
+	system_thp_mode_not_supported = 3 /* No THP support detected. */
+} system_thp_mode_t;
+
+#define THP_MODE_DEFAULT thp_mode_do_nothing
+extern thp_mode_t        opt_thp;
+extern system_thp_mode_t init_system_thp_mode; /* Initial system wide state. */
+extern const char *const thp_mode_names[];
+extern const char *const system_thp_mode_names[];
 
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
-void pages_unmap(void *addr, size_t size);
-bool pages_commit(void *addr, size_t size);
-bool pages_decommit(void *addr, size_t size);
-bool pages_purge_lazy(void *addr, size_t size);
-bool pages_purge_forced(void *addr, size_t size);
+void  pages_unmap(void *addr, size_t size);
+bool  pages_commit(void *addr, size_t size);
+bool  pages_decommit(void *addr, size_t size);
+bool  pages_purge_lazy(void *addr, size_t size);
+bool  pages_purge_forced(void *addr, size_t size);
+bool pages_purge_process_madvise(void *vec, size_t ven_len, size_t total_bytes);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
+bool pages_collapse(void *addr, size_t size);
 bool pages_dontdump(void *addr, size_t size);
 bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
-void pages_set_thp_state (void *ptr, size_t size);
+void pages_set_thp_state(void *ptr, size_t size);
 void pages_mark_guards(void *head, void *tail);
 void pages_unmark_guards(void *head, void *tail);
 
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index d978cd7d..9b4c257b 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PAI_H
 #define JEMALLOC_INTERNAL_PAI_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /* An interface for page allocation. */
 
 typedef struct pai_s pai_t;
@@ -9,15 +13,6 @@ struct pai_s {
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
 	    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
 	    bool *deferred_work_generated);
-	/*
-	 * Returns the number of extents added to the list (which may be fewer
-	 * than requested, in case of OOM).  The list should already be
-	 * initialized.  The only alignment guarantee is page-alignment, and
-	 * the results are not necessarily zeroed.
-	 */
-	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t nallocs, edata_list_active_t *results,
-	    bool *deferred_work_generated);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size, bool zero,
 	    bool *deferred_work_generated);
@@ -25,9 +20,6 @@ struct pai_s {
 	    size_t old_size, size_t new_size, bool *deferred_work_generated);
 	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    bool *deferred_work_generated);
-	/* This function empties out list as a side-effect of being called. */
-	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
-	    edata_list_active_t *list, bool *deferred_work_generated);
 	uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
 };
 
@@ -37,20 +29,12 @@ struct pai_s {
  */
 
 static inline edata_t *
-pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated) {
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	return self->alloc(tsdn, self, size, alignment, zero, guarded,
 	    frequent_reuse, deferred_work_generated);
 }
 
-static inline size_t
-pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
-	return self->alloc_batch(tsdn, self, size, nallocs, results,
-	    deferred_work_generated);
-}
-
 static inline bool
 pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
@@ -61,35 +45,19 @@ pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 static inline bool
 pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool *deferred_work_generated) {
-	return self->shrink(tsdn, self, edata, old_size, new_size,
-	    deferred_work_generated);
+	return self->shrink(
+	    tsdn, self, edata, old_size, new_size, deferred_work_generated);
 }
 
 static inline void
-pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+pai_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	self->dalloc(tsdn, self, edata, deferred_work_generated);
 }
 
-static inline void
-pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
-    bool *deferred_work_generated) {
-	self->dalloc_batch(tsdn, self, list, deferred_work_generated);
-}
-
 static inline uint64_t
 pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	return self->time_until_deferred_work(tsdn, self);
 }
 
-/*
- * An implementation of batch allocation that simply calls alloc once for
- * each item in the list.
- */
-size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
-/* Ditto, for dalloc. */
-void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated);
-
 #endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/include/jemalloc/internal/peak.h b/include/jemalloc/internal/peak.h
index 59da3e41..599f1a02 100644
--- a/include/jemalloc/internal/peak.h
+++ b/include/jemalloc/internal/peak.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PEAK_H
 #define JEMALLOC_INTERNAL_PEAK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 typedef struct peak_s peak_t;
 struct peak_s {
 	/* The highest recorded peak value, after adjustment (see below). */
@@ -12,7 +14,8 @@ struct peak_s {
 	uint64_t adjustment;
 };
 
-#define PEAK_INITIALIZER {0, 0}
+#define PEAK_INITIALIZER                                                       \
+	{ 0, 0 }
 
 static inline uint64_t
 peak_max(peak_t *peak) {
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
index b808ce04..0d1f1627 100644
--- a/include/jemalloc/internal/peak_event.h
+++ b/include/jemalloc/internal/peak_event.h
@@ -1,6 +1,17 @@
 #ifndef JEMALLOC_INTERNAL_PEAK_EVENT_H
 #define JEMALLOC_INTERNAL_PEAK_EVENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_types.h"
+
+/*
+ * Update every 64K by default.  We're not exposing this as a configuration
+ * option for now; we don't want to bind ourselves too tightly to any particular
+ * performance requirements for small values, or guarantee that we'll even be
+ * able to provide fine-grained accuracy.
+ */
+#define PEAK_EVENT_WAIT (64 * 1024)
+
 /*
  * While peak.h contains the simple helper struct that tracks state, this
  * contains the allocator tie-ins (and knows about tsd, the event module, etc.).
@@ -9,16 +20,9 @@
 /* Update the peak with current tsd state. */
 void peak_event_update(tsd_t *tsd);
 /* Set current state to zero. */
-void peak_event_zero(tsd_t *tsd);
+void     peak_event_zero(tsd_t *tsd);
 uint64_t peak_event_max(tsd_t *tsd);
 
-/* Manual hooks. */
-/* The activity-triggered hooks. */
-uint64_t peak_alloc_new_event_wait(tsd_t *tsd);
-uint64_t peak_alloc_postponed_event_wait(tsd_t *tsd);
-void peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed);
-uint64_t peak_dalloc_new_event_wait(tsd_t *tsd);
-uint64_t peak_dalloc_postponed_event_wait(tsd_t *tsd);
-void peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t peak_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_PEAK_EVENT_H */
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 5f091c5f..803d2cbd 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PH_H
 #define JEMALLOC_INTERNAL_PH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bit_util.h"
+
 /*
  * A Pairing Heap implementation.
  *
@@ -71,9 +75,19 @@ struct ph_s {
 	size_t auxcount;
 };
 
+typedef struct ph_enumerate_vars_s ph_enumerate_vars_t;
+struct ph_enumerate_vars_s {
+	uint16_t front;
+	uint16_t rear;
+	uint16_t queue_size;
+	uint16_t visited_num;
+	uint16_t max_visit_num;
+	uint16_t max_queue_size;
+};
+
 JEMALLOC_ALWAYS_INLINE phn_link_t *
 phn_link_get(void *phn, size_t offset) {
-	return (phn_link_t *)(((uintptr_t)phn) + offset);
+	return (phn_link_t *)(((char *)phn) + offset);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -115,8 +129,7 @@ phn_prev_set(void *phn, void *prev, size_t offset) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-phn_merge_ordered(void *phn0, void *phn1, size_t offset,
-    ph_cmp_t cmp) {
+phn_merge_ordered(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
 	void *phn0child;
 
 	assert(phn0 != NULL);
@@ -127,6 +140,7 @@ phn_merge_ordered(void *phn0, void *phn1, size_t offset,
 	phn0child = phn_lchild_get(phn0, offset);
 	phn_next_set(phn1, phn0child, offset);
 	if (phn0child != NULL) {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_prev_set(phn0child, phn1, offset);
 	}
 	phn_lchild_set(phn0, phn1, offset);
@@ -143,6 +157,7 @@ phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
 		phn_merge_ordered(phn0, phn1, offset, cmp);
 		result = phn0;
 	} else {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_merge_ordered(phn1, phn0, offset, cmp);
 		result = phn1;
 	}
@@ -156,6 +171,10 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 	void *phn0 = phn;
 	void *phn1 = phn_next_get(phn0, offset);
 
+	if (phn1 == NULL) {
+		return phn0;
+	}
+
 	/*
 	 * Multipass merge, wherein the first two elements of a FIFO
 	 * are repeatedly merged, and each result is appended to the
@@ -164,59 +183,61 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 	 * its tail, so we do a single pass over the sibling list to
 	 * populate the FIFO.
 	 */
-	if (phn1 != NULL) {
-		void *phnrest = phn_next_get(phn1, offset);
-		if (phnrest != NULL) {
-			phn_prev_set(phnrest, NULL, offset);
-		}
-		phn_prev_set(phn0, NULL, offset);
-		phn_next_set(phn0, NULL, offset);
-		phn_prev_set(phn1, NULL, offset);
-		phn_next_set(phn1, NULL, offset);
-		phn0 = phn_merge(phn0, phn1, offset, cmp);
-		head = tail = phn0;
-		phn0 = phnrest;
-		while (phn0 != NULL) {
-			phn1 = phn_next_get(phn0, offset);
-			if (phn1 != NULL) {
-				phnrest = phn_next_get(phn1, offset);
-				if (phnrest != NULL) {
-					phn_prev_set(phnrest, NULL, offset);
-				}
-				phn_prev_set(phn0, NULL, offset);
-				phn_next_set(phn0, NULL, offset);
-				phn_prev_set(phn1, NULL, offset);
-				phn_next_set(phn1, NULL, offset);
-				phn0 = phn_merge(phn0, phn1, offset, cmp);
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = phnrest;
-			} else {
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = NULL;
-			}
-		}
-		phn0 = head;
+	void *phnrest = phn_next_get(phn1, offset);
+	if (phnrest != NULL) {
+		phn_prev_set(phnrest, NULL, offset);
+	}
+	phn_prev_set(phn0, NULL, offset);
+	phn_next_set(phn0, NULL, offset);
+	phn_prev_set(phn1, NULL, offset);
+	phn_next_set(phn1, NULL, offset);
+	phn0 = phn_merge(phn0, phn1, offset, cmp);
+	head = tail = phn0;
+	phn0 = phnrest;
+	while (phn0 != NULL) {
 		phn1 = phn_next_get(phn0, offset);
 		if (phn1 != NULL) {
-			while (true) {
-				head = phn_next_get(phn1, offset);
-				assert(phn_prev_get(phn0, offset) == NULL);
-				phn_next_set(phn0, NULL, offset);
-				assert(phn_prev_get(phn1, offset) == NULL);
-				phn_next_set(phn1, NULL, offset);
-				phn0 = phn_merge(phn0, phn1, offset, cmp);
-				if (head == NULL) {
-					break;
-				}
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = head;
-				phn1 = phn_next_get(phn0, offset);
+			phnrest = phn_next_get(phn1, offset);
+			if (phnrest != NULL) {
+				phn_prev_set(phnrest, NULL, offset);
 			}
+			phn_prev_set(phn0, NULL, offset);
+			phn_next_set(phn0, NULL, offset);
+			phn_prev_set(phn1, NULL, offset);
+			phn_next_set(phn1, NULL, offset);
+			phn0 = phn_merge(phn0, phn1, offset, cmp);
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = phnrest;
+		} else {
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = NULL;
 		}
 	}
+	phn0 = head;
+	phn1 = phn_next_get(phn0, offset);
+	if (phn1 != NULL) {
+		while (true) {
+			head = phn_next_get(phn1, offset);
+			assert(phn_prev_get(phn0, offset) == NULL);
+			phn_next_set(phn0, NULL, offset);
+			assert(phn_prev_get(phn1, offset) == NULL);
+			phn_next_set(phn1, NULL, offset);
+			phn0 = phn_merge(phn0, phn1, offset, cmp);
+			if (head == NULL) {
+				break;
+			}
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = head;
+			phn1 = phn_next_get(phn0, offset);
+		}
+	}
+
 	return phn0;
 }
 
@@ -230,7 +251,7 @@ ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 		phn_prev_set(phn, NULL, offset);
 		phn = phn_merge_siblings(phn, offset, cmp);
 		assert(phn_next_get(phn, offset) == NULL);
-		ph->root = phn_merge(ph->root, phn, offset, cmp);
+		phn_merge_ordered(ph->root, phn, offset, cmp);
 	}
 }
 
@@ -298,6 +319,7 @@ ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 	phn0 = phn_merge(phn0, phn1, offset, cmp);
 	phn_next_set(phn0, next_phn1, offset);
 	if (next_phn1 != NULL) {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_prev_set(next_phn1, phn0, offset);
 	}
 	phn_next_set(ph->root, phn0, offset);
@@ -318,36 +340,36 @@ ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	 */
 	if (ph->root == NULL) {
 		ph->root = phn;
-	} else {
-		/*
-		 * As a special case, check to see if we can replace the root.
-		 * This is practically common in some important cases, and lets
-		 * us defer some insertions (hopefully, until the point where
-		 * some of the items in the aux list have been removed, savings
-		 * us from linking them at all).
-		 */
-		if (cmp(phn, ph->root) < 0) {
-			phn_lchild_set(phn, ph->root, offset);
-			phn_prev_set(ph->root, phn, offset);
-			ph->root = phn;
-			ph->auxcount = 0;
-			return;
-		}
-		ph->auxcount++;
-		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
-		if (phn_next_get(ph->root, offset) != NULL) {
-			phn_prev_set(phn_next_get(ph->root, offset), phn,
-			    offset);
-		}
-		phn_prev_set(phn, ph->root, offset);
-		phn_next_set(ph->root, phn, offset);
+		return;
 	}
-	if (ph->auxcount > 1) {
-		unsigned nmerges = ffs_zu(ph->auxcount - 1);
-		bool done = false;
-		for (unsigned i = 0; i < nmerges && !done; i++) {
-			done = ph_try_aux_merge_pair(ph, offset, cmp);
-		}
+
+	/*
+	 * As a special case, check to see if we can replace the root.
+	 * This is practically common in some important cases, and lets
+	 * us defer some insertions (hopefully, until the point where
+	 * some of the items in the aux list have been removed, savings
+	 * us from linking them at all).
+	 */
+	if (cmp(phn, ph->root) < 0) {
+		phn_lchild_set(phn, ph->root, offset);
+		phn_prev_set(ph->root, phn, offset);
+		ph->root = phn;
+		ph->auxcount = 0;
+		return;
+	}
+
+	phn_next_set(phn, phn_next_get(ph->root, offset), offset);
+	if (phn_next_get(ph->root, offset) != NULL) {
+		phn_prev_set(phn_next_get(ph->root, offset), phn, offset);
+	}
+	phn_prev_set(phn, ph->root, offset);
+	phn_next_set(ph->root, phn, offset);
+
+	ph->auxcount++;
+	unsigned nmerges = ffs_zu(ph->auxcount);
+	bool     done = false;
+	for (unsigned i = 0; i < nmerges && !done; i++) {
+		done = ph_try_aux_merge_pair(ph, offset, cmp);
 	}
 }
 
@@ -363,158 +385,210 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 	ph->root = ph_merge_children(ph->root, offset, cmp);
 
 	return ret;
-
 }
 
 JEMALLOC_ALWAYS_INLINE void
 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
-	void *replace;
-	void *parent;
-
 	if (ph->root == phn) {
-		/*
-		 * We can delete from aux list without merging it, but we need
-		 * to merge if we are dealing with the root node and it has
-		 * children.
-		 */
-		if (phn_lchild_get(phn, offset) == NULL) {
-			ph->root = phn_next_get(phn, offset);
-			if (ph->root != NULL) {
-				phn_prev_set(ph->root, NULL, offset);
-			}
-			return;
-		}
 		ph_merge_aux(ph, offset, cmp);
-		if (ph->root == phn) {
-			ph->root = ph_merge_children(ph->root, offset, cmp);
-			return;
-		}
+		ph->root = ph_merge_children(phn, offset, cmp);
+		return;
 	}
 
-	/* Get parent (if phn is leftmost child) before mutating. */
-	if ((parent = phn_prev_get(phn, offset)) != NULL) {
-		if (phn_lchild_get(parent, offset) != phn) {
-			parent = NULL;
-		}
-	}
-	/* Find a possible replacement node, and link to parent. */
-	replace = ph_merge_children(phn, offset, cmp);
-	/* Set next/prev for sibling linked list. */
+	void *prev = phn_prev_get(phn, offset);
+	void *next = phn_next_get(phn, offset);
+
+	/* If we have children, then we integrate them back in the heap. */
+	void *replace = ph_merge_children(phn, offset, cmp);
 	if (replace != NULL) {
-		if (parent != NULL) {
-			phn_prev_set(replace, parent, offset);
-			phn_lchild_set(parent, replace, offset);
-		} else {
-			phn_prev_set(replace, phn_prev_get(phn, offset),
-			    offset);
-			if (phn_prev_get(phn, offset) != NULL) {
-				phn_next_set(phn_prev_get(phn, offset), replace,
-				    offset);
-			}
-		}
-		phn_next_set(replace, phn_next_get(phn, offset), offset);
-		if (phn_next_get(phn, offset) != NULL) {
-			phn_prev_set(phn_next_get(phn, offset), replace,
-			    offset);
+		phn_next_set(replace, next, offset);
+		if (next != NULL) {
+			phn_prev_set(next, replace, offset);
 		}
+
+		next = replace;
+	}
+
+	if (next != NULL) {
+		phn_prev_set(next, prev, offset);
+	}
+
+	assert(prev != NULL);
+	if (phn_lchild_get(prev, offset) == phn) {
+		phn_lchild_set(prev, next, offset);
 	} else {
-		if (parent != NULL) {
-			void *next = phn_next_get(phn, offset);
-			phn_lchild_set(parent, next, offset);
-			if (next != NULL) {
-				phn_prev_set(next, parent, offset);
-			}
-		} else {
-			assert(phn_prev_get(phn, offset) != NULL);
-			phn_next_set(
-			    phn_prev_get(phn, offset),
-			    phn_next_get(phn, offset), offset);
-		}
-		if (phn_next_get(phn, offset) != NULL) {
-			phn_prev_set(
-			    phn_next_get(phn, offset),
-			    phn_prev_get(phn, offset), offset);
-		}
+		phn_next_set(prev, next, offset);
 	}
 }
 
-#define ph_structs(a_prefix, a_type)					\
-typedef struct {							\
-	phn_link_t link;						\
-} a_prefix##_link_t;							\
-									\
-typedef struct {							\
-	ph_t ph;							\
-} a_prefix##_t;
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_vars_init(ph_enumerate_vars_t *vars, uint16_t max_visit_num,
+    uint16_t max_queue_size) {
+	vars->queue_size = 0;
+	vars->visited_num = 0;
+	vars->front = 0;
+	vars->rear = 0;
+	vars->max_visit_num = max_visit_num;
+	vars->max_queue_size = max_queue_size;
+	assert(vars->max_visit_num > 0);
+	/*
+	 * max_queue_size must be able to support max_visit_num, which means
+	 * the queue will not overflow before reaching max_visit_num.
+	 */
+	assert(vars->max_queue_size >= (vars->max_visit_num + 1) / 2);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_queue_push(
+    void *phn, void **bfs_queue, ph_enumerate_vars_t *vars) {
+	assert(vars->queue_size < vars->max_queue_size);
+	bfs_queue[vars->rear] = phn;
+	vars->rear = (vars->rear + 1) % vars->max_queue_size;
+	(vars->queue_size)++;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_enumerate_queue_pop(void **bfs_queue, ph_enumerate_vars_t *vars) {
+	assert(vars->queue_size > 0);
+	assert(vars->queue_size <= vars->max_queue_size);
+	void *ret = bfs_queue[vars->front];
+	vars->front = (vars->front + 1) % vars->max_queue_size;
+	(vars->queue_size)--;
+	return ret;
+}
+
+/*
+ * The two functions below offer a solution to enumerate the pairing heap.
+ * Whe enumerating, always call ph_enumerate_prepare first to prepare the queue
+ * needed for BFS.  Next, call ph_enumerate_next to get the next element in
+ * the enumeration.  When enumeration ends, ph_enumerate_next returns NULL and
+ * should not be called again.  Enumeration ends when all elements in the heap
+ * has been enumerated or the number of visited elements exceed
+ * max_visit_num.
+ */
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_prepare(ph_t *ph, void **bfs_queue, ph_enumerate_vars_t *vars,
+    uint16_t max_visit_num, uint16_t max_queue_size) {
+	ph_enumerate_vars_init(vars, max_visit_num, max_queue_size);
+	ph_enumerate_queue_push(ph->root, bfs_queue, vars);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_enumerate_next(
+    ph_t *ph, size_t offset, void **bfs_queue, ph_enumerate_vars_t *vars) {
+	if (vars->queue_size == 0) {
+		return NULL;
+	}
+
+	(vars->visited_num)++;
+	if (vars->visited_num > vars->max_visit_num) {
+		return NULL;
+	}
+
+	void *ret = ph_enumerate_queue_pop(bfs_queue, vars);
+	assert(ret != NULL);
+	void *left = phn_lchild_get(ret, offset);
+	void *right = phn_next_get(ret, offset);
+	if (left) {
+		ph_enumerate_queue_push(left, bfs_queue, vars);
+	}
+	if (right) {
+		ph_enumerate_queue_push(right, bfs_queue, vars);
+	}
+	return ret;
+}
+
+#define ph_structs(a_prefix, a_type, a_max_queue_size)                         \
+	typedef struct {                                                       \
+		phn_link_t link;                                               \
+	} a_prefix##_link_t;                                                   \
+                                                                               \
+	typedef struct {                                                       \
+		ph_t ph;                                                       \
+	} a_prefix##_t;                                                        \
+                                                                               \
+	typedef struct {                                                       \
+		void               *bfs_queue[a_max_queue_size];               \
+		ph_enumerate_vars_t vars;                                      \
+	} a_prefix##_enumerate_helper_t;
 
 /*
  * The ph_proto() macro generates function prototypes that correspond to the
  * functions generated by an equivalently parameterized call to ph_gen().
  */
-#define ph_proto(a_attr, a_prefix, a_type)				\
-									\
-a_attr void a_prefix##_new(a_prefix##_t *ph);				\
-a_attr bool a_prefix##_empty(a_prefix##_t *ph);				\
-a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
-a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
-a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
-a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
-a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
-a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);
+#define ph_proto(a_attr, a_prefix, a_type)                                     \
+                                                                               \
+	a_attr void    a_prefix##_new(a_prefix##_t *ph);                       \
+	a_attr bool    a_prefix##_empty(a_prefix##_t *ph);                     \
+	a_attr a_type *a_prefix##_first(a_prefix##_t *ph);                     \
+	a_attr a_type *a_prefix##_any(a_prefix##_t *ph);                       \
+	a_attr void    a_prefix##_insert(a_prefix##_t *ph, a_type *phn);       \
+	a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);              \
+	a_attr void    a_prefix##_remove(a_prefix##_t *ph, a_type *phn);       \
+	a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);                \
+	a_attr void    a_prefix##_enumerate_prepare(a_prefix##_t *ph,          \
+	       a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,  \
+	       uint16_t max_queue_size);                                       \
+	a_attr a_type *a_prefix##_enumerate_next(                              \
+	    a_prefix##_t *ph, a_prefix##_enumerate_helper_t *helper);
 
 /* The ph_gen() macro generates a type-specific pairing heap implementation. */
-#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
-JEMALLOC_ALWAYS_INLINE int						\
-a_prefix##_ph_cmp(void *a, void *b) {					\
-	return a_cmp((a_type *)a, (a_type *)b);				\
-}									\
-									\
-a_attr void								\
-a_prefix##_new(a_prefix##_t *ph) {					\
-	ph_new(&ph->ph);						\
-}									\
-									\
-a_attr bool								\
-a_prefix##_empty(a_prefix##_t *ph) {					\
-	return ph_empty(&ph->ph);					\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_first(a_prefix##_t *ph) {					\
-	return ph_first(&ph->ph, offsetof(a_type, a_field),		\
-	    &a_prefix##_ph_cmp);					\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_any(a_prefix##_t *ph) {					\
-	return ph_any(&ph->ph, offsetof(a_type, a_field));		\
-}									\
-									\
-a_attr void								\
-a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
-	ph_insert(&ph->ph, phn, offsetof(a_type, a_field),		\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_remove_first(a_prefix##_t *ph) {				\
-	return ph_remove_first(&ph->ph, offsetof(a_type, a_field),	\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr void								\
-a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
-	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_remove_any(a_prefix##_t *ph) {				\
-	a_type *ret = a_prefix##_any(ph);				\
-	if (ret != NULL) {						\
-		a_prefix##_remove(ph, ret);				\
-	}								\
-	return ret;							\
-}
+#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)                       \
+	JEMALLOC_ALWAYS_INLINE int a_prefix##_ph_cmp(void *a, void *b) {       \
+		return a_cmp((a_type *)a, (a_type *)b);                        \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_new(a_prefix##_t *ph) {                         \
+		ph_new(&ph->ph);                                               \
+	}                                                                      \
+                                                                               \
+	a_attr bool a_prefix##_empty(a_prefix##_t *ph) {                       \
+		return ph_empty(&ph->ph);                                      \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_first(a_prefix##_t *ph) {                    \
+		return ph_first(                                               \
+		    &ph->ph, offsetof(a_type, a_field), &a_prefix##_ph_cmp);   \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_any(a_prefix##_t *ph) {                      \
+		return ph_any(&ph->ph, offsetof(a_type, a_field));             \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {         \
+		ph_insert(&ph->ph, phn, offsetof(a_type, a_field),             \
+		    a_prefix##_ph_cmp);                                        \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph) {             \
+		return ph_remove_first(                                        \
+		    &ph->ph, offsetof(a_type, a_field), a_prefix##_ph_cmp);    \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {         \
+		ph_remove(&ph->ph, phn, offsetof(a_type, a_field),             \
+		    a_prefix##_ph_cmp);                                        \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph) {               \
+		a_type *ret = a_prefix##_any(ph);                              \
+		if (ret != NULL) {                                             \
+			a_prefix##_remove(ph, ret);                            \
+		}                                                              \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph,             \
+	    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,     \
+	    uint16_t max_queue_size) {                                         \
+		ph_enumerate_prepare(&ph->ph, helper->bfs_queue,               \
+		    &helper->vars, max_visit_num, max_queue_size);             \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_enumerate_next(                              \
+	    a_prefix##_t *ph, a_prefix##_enumerate_helper_t *helper) {         \
+		return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field),   \
+		    helper->bfs_queue, &helper->vars);                         \
+	}
 
 #endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 14542aa1..04049519 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PRNG_H
 #define JEMALLOC_INTERNAL_PRNG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 
 /*
@@ -25,11 +26,11 @@
 /******************************************************************************/
 /* INTERNAL DEFINITIONS -- IGNORE */
 /******************************************************************************/
-#define PRNG_A_32	UINT32_C(1103515241)
-#define PRNG_C_32	UINT32_C(12347)
+#define PRNG_A_32 UINT32_C(1103515241)
+#define PRNG_C_32 UINT32_C(12347)
 
-#define PRNG_A_64	UINT64_C(6364136223846793005)
-#define PRNG_C_64	UINT64_C(1442695040888963407)
+#define PRNG_A_64 UINT64_C(6364136223846793005)
+#define PRNG_C_64 UINT64_C(1442695040888963407)
 
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_state_next_u32(uint32_t state) {
@@ -48,7 +49,7 @@ prng_state_next_zu(size_t state) {
 #elif LG_SIZEOF_PTR == 3
 	return (state * PRNG_A_64) + PRNG_C_64;
 #else
-#error Unsupported pointer size
+#	error Unsupported pointer size
 #endif
 }
 
diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 4c8e22c7..0af5835c 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_DATA_H
 #define JEMALLOC_INTERNAL_PROF_DATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t bt2gctx_mtx;
@@ -16,22 +17,21 @@ extern size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
-bool prof_data_init(tsd_t *tsd);
+bool         prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
-char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
-int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-void prof_unbias_map_init();
+int          prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
+void         prof_unbias_map_init(void);
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck);
-prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+prof_tdata_t *prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
-void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_reset(tsd_t *tsd, size_t lg_sample);
-void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+void          prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void          prof_reset(tsd_t *tsd, size_t lg_sample);
+void          prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-void prof_cnt_all(prof_cnt_t *cnt_all);
+void   prof_cnt_all(prof_cnt_t *cnt_all);
 
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index bdff1349..e07e69f5 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -1,28 +1,35 @@
 #ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
 #define JEMALLOC_INTERNAL_PROF_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_hook.h"
+#include "jemalloc/internal/thread_event_registry.h"
 
-extern bool opt_prof;
-extern bool opt_prof_active;
-extern bool opt_prof_thread_active_init;
-extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
-extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool opt_prof_gdump;          /* High-water memory dumping. */
-extern bool opt_prof_final;          /* Final profile dumping. */
-extern bool opt_prof_leak;           /* Dump leak summary at exit. */
-extern bool opt_prof_leak_error;     /* Exit with error code if memory leaked */
-extern bool opt_prof_accum;          /* Report cumulative bytes. */
-extern bool opt_prof_log;            /* Turn logging on at boot. */
-extern char opt_prof_prefix[
-    /* Minimize memory bloat for non-prof builds. */
+extern bool     opt_prof;
+extern bool     opt_prof_active;
+extern bool     opt_prof_thread_active_init;
+extern unsigned opt_prof_bt_max;
+extern size_t   opt_lg_prof_sample; /* Mean bytes between samples. */
+extern ssize_t opt_lg_prof_interval;    /* lg(prof_interval). */
+extern bool    opt_prof_gdump;          /* High-water memory dumping. */
+extern bool    opt_prof_final;          /* Final profile dumping. */
+extern bool    opt_prof_leak;           /* Dump leak summary at exit. */
+extern bool    opt_prof_leak_error; /* Exit with error code if memory leaked */
+extern bool    opt_prof_accum;      /* Report cumulative bytes. */
+extern bool    opt_prof_log;        /* Turn logging on at boot. */
+extern char    opt_prof_prefix[
+/* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PATH_MAX +
 #endif
     1];
 extern bool opt_prof_unbias;
 
+/* Include pid namespace in profile file names. */
+extern bool opt_prof_pid_namespace;
+
 /* For recording recent allocations */
 extern ssize_t opt_prof_recent_alloc_max;
 
@@ -49,47 +56,110 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
-prof_backtrace_hook_t prof_backtrace_hook_get();
+void                  prof_backtrace_hook_set(prof_backtrace_hook_t hook);
+prof_backtrace_hook_t prof_backtrace_hook_get(void);
 
-void prof_dump_hook_set(prof_dump_hook_t hook);
-prof_dump_hook_t prof_dump_hook_get();
+void             prof_dump_hook_set(prof_dump_hook_t hook);
+prof_dump_hook_t prof_dump_hook_get(void);
+
+void               prof_sample_hook_set(prof_sample_hook_t hook);
+prof_sample_hook_t prof_sample_hook_get(void);
+
+void                    prof_sample_free_hook_set(prof_sample_free_hook_t hook);
+prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
-void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
-    size_t usize, prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
+void prof_malloc_sample_object(
+    tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx);
+void prof_free_sampled_object(
+    tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
-void prof_idump(tsdn_t *tsdn);
-bool prof_mdump(tsd_t *tsd, const char *filename);
-void prof_gdump(tsdn_t *tsdn);
+void         prof_idump(tsdn_t *tsdn);
+bool         prof_mdump(tsd_t *tsd, const char *filename);
+void         prof_gdump(tsdn_t *tsdn);
 
-void prof_tdata_cleanup(tsd_t *tsd);
-bool prof_active_get(tsdn_t *tsdn);
-bool prof_active_set(tsdn_t *tsdn, bool active);
+void        prof_tdata_cleanup(tsd_t *tsd);
+bool        prof_active_get(tsdn_t *tsdn);
+bool        prof_active_set(tsdn_t *tsdn, bool active);
 const char *prof_thread_name_get(tsd_t *tsd);
-int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool prof_thread_active_get(tsd_t *tsd);
-bool prof_thread_active_set(tsd_t *tsd, bool active);
-bool prof_thread_active_init_get(tsdn_t *tsdn);
-bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
-bool prof_gdump_get(tsdn_t *tsdn);
-bool prof_gdump_set(tsdn_t *tsdn, bool active);
-void prof_boot0(void);
-void prof_boot1(void);
-bool prof_boot2(tsd_t *tsd, base_t *base);
-void prof_prefork0(tsdn_t *tsdn);
-void prof_prefork1(tsdn_t *tsdn);
-void prof_postfork_parent(tsdn_t *tsdn);
-void prof_postfork_child(tsdn_t *tsdn);
+int         prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool        prof_thread_active_get(tsd_t *tsd);
+bool        prof_thread_active_set(tsd_t *tsd, bool active);
+bool        prof_thread_active_init_get(tsdn_t *tsdn);
+bool        prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool        prof_gdump_get(tsdn_t *tsdn);
+bool        prof_gdump_set(tsdn_t *tsdn, bool active);
+void        prof_boot0(void);
+void        prof_boot1(void);
+bool        prof_boot2(tsd_t *tsd, base_t *base);
+void        prof_prefork0(tsdn_t *tsdn);
+void        prof_prefork1(tsdn_t *tsdn);
+void        prof_postfork_parent(tsdn_t *tsdn);
+void        prof_postfork_child(tsdn_t *tsdn);
 
-/* Only accessed by thread event. */
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
-uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
-void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
+uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
+
+/*
+ * The lookahead functionality facilitates events to be able to lookahead, i.e.
+ * without touching the event counters, to determine whether an event would be
+ * triggered.  The event counters are not advanced until the end of the
+ * allocation / deallocation calls, so the lookahead can be useful if some
+ * preparation work for some event must be done early in the allocation /
+ * deallocation calls.
+ *
+ * Currently only the profiling sampling event needs the lookahead
+ * functionality, so we don't yet define general purpose lookahead functions.
+ *
+ * Surplus is a terminology referring to the amount of bytes beyond what's
+ * needed for triggering an event, which can be a useful quantity to have in
+ * general when lookahead is being called.
+ *
+ * This function returns true if allocation of usize would go above the next
+ * trigger for prof event, and false otherwise.
+ * If function returns true surplus will contain number of bytes beyond that
+ * trigger.
+ */
+
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead_surplus(
+    tsd_t *tsd, size_t usize, size_t *surplus) {
+	if (surplus != NULL) {
+		/*
+		 * This is a dead store: the surplus will be overwritten before
+		 * any read.  The initialization suppresses compiler warnings.
+		 * Meanwhile, using SIZE_MAX to initialize is good for
+		 * debugging purpose, because a valid surplus value is strictly
+		 * less than usize, which is at most SIZE_MAX.
+		 */
+		*surplus = SIZE_MAX;
+	}
+	if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
+		return false;
+	}
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize
+	    - tsd_thread_allocated_last_event_get(tsd);
+	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
+	if (accumbytes < sample_wait) {
+		return false;
+	}
+	assert(accumbytes - sample_wait < (uint64_t)usize);
+	if (surplus != NULL) {
+		*surplus = (size_t)(accumbytes - sample_wait);
+	}
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
+	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
+}
+
+extern te_base_cb_t prof_sample_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 150d19d3..d5a9b0ff 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_HOOK_H
 #define JEMALLOC_INTERNAL_PROF_HOOK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * The hooks types of which are declared in this file are experimental and
  * undocumented, thus the typedefs are located in an 'internal' header.
@@ -18,4 +20,11 @@ typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
  */
 typedef void (*prof_dump_hook_t)(const char *filename);
 
+/* ptr, size, backtrace vector, backtrace vector length, usize */
+typedef void (*prof_sample_hook_t)(const void *ptr, size_t size,
+    void **backtrace, unsigned backtrace_length, size_t usize);
+
+/* ptr, size */
+typedef void (*prof_sample_free_hook_t)(const void *, size_t);
+
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index a8e7e7fb..4a36bd7a 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -1,12 +1,17 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
 
 JEMALLOC_ALWAYS_INLINE void
-prof_active_assert() {
+prof_active_assert(void) {
 	cassert(config_prof);
 	/*
 	 * If opt_prof is off, then prof_active must always be off, regardless
@@ -37,6 +42,22 @@ prof_gdump_get_unlocked(void) {
 	return prof_gdump_val;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+prof_thread_name_assert(prof_tdata_t *tdata) {
+	if (!config_debug) {
+		return;
+	}
+	prof_active_assert();
+
+	bool terminated = false;
+	for (unsigned i = 0; i < PROF_THREAD_NAME_MAX_LEN; i++) {
+		if (tdata->thread_name[i] == '\0') {
+			terminated = true;
+		}
+	}
+	assert(terminated);
+}
+
 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
 prof_tdata_get(tsd_t *tsd, bool create) {
 	prof_tdata_t *tdata;
@@ -58,6 +79,10 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 		assert(tdata == NULL || tdata->attached);
 	}
 
+	if (tdata != NULL) {
+		prof_thread_name_assert(tdata);
+	}
+
 	return tdata;
 }
 
@@ -81,6 +106,11 @@ prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
 	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_tctx_is_valid(const prof_tctx_t *tctx) {
+	return tctx != NULL && tctx != PROF_TCTX_SENTINEL;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 prof_tctx_reset(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
@@ -101,7 +131,7 @@ JEMALLOC_ALWAYS_INLINE void
 prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	cassert(config_prof);
 	assert(edata != NULL);
-	assert((uintptr_t)tctx > (uintptr_t)1U);
+	assert(prof_tctx_is_valid(tctx));
 
 	arena_prof_info_set(tsd, edata, tctx, size);
 }
@@ -134,9 +164,9 @@ JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
 	prof_tctx_t *ret;
 
-	if (!prof_active ||
-	    likely(prof_sample_should_skip(tsd, sample_event))) {
-		ret = (prof_tctx_t *)(uintptr_t)1U;
+	if (!prof_active
+	    || likely(prof_sample_should_skip(tsd, sample_event))) {
+		ret = PROF_TCTX_SENTINEL;
 	} else {
 		ret = prof_tctx_create(tsd);
 	}
@@ -151,7 +181,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+	if (unlikely(prof_tctx_is_valid(tctx))) {
 		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
 	} else {
 		prof_tctx_reset(tsd, ptr, alloc_ctx);
@@ -165,7 +195,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
-	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
+	assert(ptr != NULL || !prof_tctx_is_valid(tctx));
 
 	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -178,12 +208,12 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 			 * sample threshold.
 			 */
 			prof_alloc_rollback(tsd, tctx);
-			tctx = (prof_tctx_t *)(uintptr_t)1U;
+			tctx = PROF_TCTX_SENTINEL;
 		}
 	}
 
-	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_prof_info->alloc_tctx > (uintptr_t)1U);
+	sampled = prof_tctx_is_valid(tctx);
+	old_sampled = prof_tctx_is_valid(old_prof_info->alloc_tctx);
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
@@ -201,7 +231,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	} else {
 		prof_info_t prof_info;
 		prof_info_get(tsd, ptr, NULL, &prof_info);
-		assert((uintptr_t)prof_info.alloc_tctx == (uintptr_t)1U);
+		assert(prof_info.alloc_tctx == PROF_TCTX_SENTINEL);
 	}
 
 	/*
@@ -212,31 +242,29 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_usize, old_prof_info);
+		prof_free_sampled_object(
+		    tsd, old_ptr, old_usize, old_prof_info);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prof_sample_align(size_t orig_align) {
+prof_sample_align(size_t usize, size_t orig_align) {
 	/*
-	 * Enforce page alignment, so that sampled allocations can be identified
+	 * Enforce alignment, so that sampled allocations can be identified
 	 * w/o metadata lookup.
 	 */
 	assert(opt_prof);
-	return (opt_cache_oblivious && orig_align < PAGE) ? PAGE :
-	    orig_align;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_aligned(const void *ptr) {
-	return ((uintptr_t)ptr & PAGE_MASK) == 0;
+	return (orig_align < PROF_SAMPLE_ALIGNMENT
+	           && (sz_can_use_slab(usize) || opt_cache_oblivious))
+	    ? PROF_SAMPLE_ALIGNMENT
+	    : orig_align;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_sampled(tsd_t *tsd, const void *ptr) {
 	prof_info_t prof_info;
 	prof_info_get(tsd, ptr, NULL, &prof_info);
-	bool sampled = (uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U;
+	bool sampled = prof_tctx_is_valid(prof_info.alloc_tctx);
 	if (sampled) {
 		assert(prof_sample_aligned(ptr));
 	}
@@ -244,18 +272,32 @@ prof_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize,
-    emap_alloc_ctx_t *alloc_ctx) {
+prof_free(
+    tsd_t *tsd, const void *ptr, size_t usize, emap_alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
 	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
+	if (unlikely(prof_tctx_is_valid(prof_info.alloc_tctx))) {
 		assert(prof_sample_aligned(ptr));
-		prof_free_sampled_object(tsd, usize, &prof_info);
+		prof_free_sampled_object(tsd, ptr, usize, &prof_info);
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_thread_name_empty(prof_tdata_t *tdata) {
+	prof_active_assert();
+
+	return (tdata->thread_name[0] == '\0');
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_thread_name_clear(prof_tdata_t *tdata) {
+	prof_active_assert();
+
+	tdata->thread_name[0] = '\0';
+}
+
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index ccb557dd..d9b97dc8 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_LOG_H
 #define JEMALLOC_INTERNAL_PROF_LOG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t log_mtx;
@@ -12,9 +13,9 @@ bool prof_log_init(tsd_t *tsdn);
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
 size_t prof_log_thr_count(void);
-bool prof_log_is_logging(void);
-bool prof_log_rep_check(void);
-void prof_log_dummy_set(bool new_value);
+bool   prof_log_is_logging(void);
+bool   prof_log_rep_check(void);
+void   prof_log_dummy_set(bool new_value);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index df410236..33649e6d 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,13 +1,17 @@
 #ifndef JEMALLOC_INTERNAL_PROF_RECENT_H
 #define JEMALLOC_INTERNAL_PROF_RECENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/mutex.h"
+
 extern malloc_mutex_t prof_recent_alloc_mtx;
 extern malloc_mutex_t prof_recent_dump_mtx;
 
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
-bool prof_recent_init();
+bool prof_recent_init(void);
 void edata_prof_recent_alloc_init(edata_t *edata);
 
 /* Used in unit tests. */
@@ -16,7 +20,7 @@ extern prof_recent_list_t prof_recent_alloc_list;
 edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
-ssize_t prof_recent_alloc_max_ctl_read();
+ssize_t prof_recent_alloc_max_ctl_read(void);
 ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
 void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
 
diff --git a/include/jemalloc/internal/prof_stats.h b/include/jemalloc/internal/prof_stats.h
index 7954e82d..c4d269e5 100644
--- a/include/jemalloc/internal/prof_stats.h
+++ b/include/jemalloc/internal/prof_stats.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STATS_H
 #define JEMALLOC_INTERNAL_PROF_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
+
 typedef struct prof_stats_s prof_stats_t;
 struct prof_stats_s {
 	uint64_t req_sum;
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index dd22115f..d38b15ea 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
@@ -9,29 +10,29 @@
 
 struct prof_bt_s {
 	/* Backtrace, stored as len program counters. */
-	void		**vec;
-	unsigned	len;
+	void   **vec;
+	unsigned len;
 };
 
 #ifdef JEMALLOC_PROF_LIBGCC
 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
 typedef struct {
-	void 		**vec;
-	unsigned	*len;
-	unsigned	max;
+	void    **vec;
+	unsigned *len;
+	unsigned  max;
 } prof_unwind_data_t;
 #endif
 
 struct prof_cnt_s {
 	/* Profiling counters. */
-	uint64_t	curobjs;
-	uint64_t	curobjs_shifted_unbiased;
-	uint64_t	curbytes;
-	uint64_t	curbytes_unbiased;
-	uint64_t	accumobjs;
-	uint64_t	accumobjs_shifted_unbiased;
-	uint64_t	accumbytes;
-	uint64_t	accumbytes_unbiased;
+	uint64_t curobjs;
+	uint64_t curobjs_shifted_unbiased;
+	uint64_t curbytes;
+	uint64_t curbytes_unbiased;
+	uint64_t accumobjs;
+	uint64_t accumobjs_shifted_unbiased;
+	uint64_t accumbytes;
+	uint64_t accumbytes_unbiased;
 };
 
 typedef enum {
@@ -43,26 +44,26 @@ typedef enum {
 
 struct prof_tctx_s {
 	/* Thread data for thread that performed the allocation. */
-	prof_tdata_t		*tdata;
+	prof_tdata_t *tdata;
 
 	/*
 	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
 	 * defunct during teardown.
 	 */
-	uint64_t		thr_uid;
-	uint64_t		thr_discrim;
+	uint64_t thr_uid;
+	uint64_t thr_discrim;
 
 	/*
 	 * Reference count of how many times this tctx object is referenced in
 	 * recent allocation / deallocation records, protected by tdata->lock.
 	 */
-	uint64_t		recent_count;
+	uint64_t recent_count;
 
 	/* Profiling counters, protected by tdata->lock. */
-	prof_cnt_t		cnts;
+	prof_cnt_t cnts;
 
 	/* Associated global context. */
-	prof_gctx_t		*gctx;
+	prof_gctx_t *gctx;
 
 	/*
 	 * UID that distinguishes multiple tctx's created by the same thread,
@@ -77,40 +78,40 @@ struct prof_tctx_s {
 	 *   threshold can be hit again before the first consumer finishes
 	 *   executing prof_tctx_destroy().
 	 */
-	uint64_t		tctx_uid;
+	uint64_t tctx_uid;
 
 	/* Linkage into gctx's tctxs. */
-	rb_node(prof_tctx_t)	tctx_link;
+	rb_node(prof_tctx_t) tctx_link;
 
 	/*
 	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
 	 * sample vs destroy race.
 	 */
-	bool			prepared;
+	bool prepared;
 
 	/* Current dump-related state, protected by gctx->lock. */
-	prof_tctx_state_t	state;
+	prof_tctx_state_t state;
 
 	/*
 	 * Copy of cnts snapshotted during early dump phase, protected by
 	 * dump_mtx.
 	 */
-	prof_cnt_t		dump_cnts;
+	prof_cnt_t dump_cnts;
 };
 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
 struct prof_info_s {
 	/* Time when the allocation was made. */
-	nstime_t		alloc_time;
+	nstime_t alloc_time;
 	/* Points to the prof_tctx_t corresponding to the allocation. */
-	prof_tctx_t		*alloc_tctx;
+	prof_tctx_t *alloc_tctx;
 	/* Allocation request size. */
-	size_t			alloc_size;
+	size_t alloc_size;
 };
 
 struct prof_gctx_s {
 	/* Protects nlimbo, cnt_summed, and tctxs. */
-	malloc_mutex_t		*lock;
+	malloc_mutex_t *lock;
 
 	/*
 	 * Number of threads that currently cause this gctx to be in a state of
@@ -122,54 +123,48 @@ struct prof_gctx_s {
 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
 	 * gctx.
 	 */
-	unsigned		nlimbo;
+	unsigned nlimbo;
 
 	/*
 	 * Tree of profile counters, one for each thread that has allocated in
 	 * this context.
 	 */
-	prof_tctx_tree_t	tctxs;
+	prof_tctx_tree_t tctxs;
 
 	/* Linkage for tree of contexts to be dumped. */
-	rb_node(prof_gctx_t)	dump_link;
+	rb_node(prof_gctx_t) dump_link;
 
 	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
+	prof_cnt_t cnt_summed;
 
 	/* Associated backtrace. */
-	prof_bt_t		bt;
+	prof_bt_t bt;
 
 	/* Backtrace vector, variable size, referred to by bt. */
-	void			*vec[1];
+	void *vec[1];
 };
 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
 
 struct prof_tdata_s {
-	malloc_mutex_t		*lock;
+	malloc_mutex_t *lock;
 
 	/* Monotonically increasing unique thread identifier. */
-	uint64_t		thr_uid;
+	uint64_t thr_uid;
 
 	/*
 	 * Monotonically increasing discriminator among tdata structures
 	 * associated with the same thr_uid.
 	 */
-	uint64_t		thr_discrim;
+	uint64_t thr_discrim;
 
-	/* Included in heap profile dumps if non-NULL. */
-	char			*thread_name;
-
-	bool			attached;
-	bool			expired;
-
-	rb_node(prof_tdata_t)	tdata_link;
+	rb_node(prof_tdata_t) tdata_link;
 
 	/*
 	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
 	 * necessary when incrementing this field, because only one thread ever
 	 * does so.
 	 */
-	uint64_t		tctx_uid_next;
+	uint64_t tctx_uid_next;
 
 	/*
 	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
@@ -177,12 +172,15 @@ struct prof_tdata_s {
 	 * associated with thread-specific prof_tctx_t objects.  Other threads
 	 * may write to prof_tctx_t contents when freeing associated objects.
 	 */
-	ckh_t			bt2tctx;
+	ckh_t bt2tctx;
+
+	/* Included in heap profile dumps if has content. */
+	char thread_name[PROF_THREAD_NAME_MAX_LEN];
 
 	/* State used to avoid dumping while operating on prof internals. */
-	bool			enq;
-	bool			enq_idump;
-	bool			enq_gdump;
+	bool enq;
+	bool enq_idump;
+	bool enq_gdump;
 
 	/*
 	 * Set to true during an early dump phase for tdata's which are
@@ -190,19 +188,22 @@ struct prof_tdata_s {
 	 * to false so that they aren't accidentally included in later dump
 	 * phases.
 	 */
-	bool			dumping;
+	bool dumping;
 
 	/*
 	 * True if profiling is active for this tdata's thread
 	 * (thread.prof.active mallctl).
 	 */
-	bool			active;
+	bool active;
+
+	bool attached;
+	bool expired;
 
 	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
+	prof_cnt_t cnt_summed;
 
 	/* Backtrace vector, used for calls to prof_backtrace(). */
-	void			*vec[PROF_BT_MAX];
+	void **vec;
 };
 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
@@ -211,9 +212,9 @@ struct prof_recent_s {
 	nstime_t dalloc_time;
 
 	ql_elm(prof_recent_t) link;
-	size_t size;
-	size_t usize;
-	atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
+	size_t       size;
+	size_t       usize;
+	atomic_p_t   alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *alloc_tctx;
 	prof_tctx_t *dalloc_tctx;
 };
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 3d25a429..0745b991 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -1,30 +1,35 @@
 #ifndef JEMALLOC_INTERNAL_PROF_SYS_H
 #define JEMALLOC_INTERNAL_PROF_SYS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/mutex.h"
+
 extern malloc_mutex_t prof_dump_filename_mtx;
-extern base_t *prof_base;
+extern base_t        *prof_base;
 
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
-void prof_hooks_init();
-void prof_unwind_init();
+void prof_hooks_init(void);
+void prof_unwind_init(void);
 void prof_sys_thread_name_fetch(tsd_t *tsd);
-int prof_getpid(void);
+int  prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
+int  prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high);
 
 /* Used in unit tests. */
-typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+typedef int(prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
-typedef int (prof_dump_open_file_t)(const char *, int);
+typedef int(prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
-typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
+typedef ssize_t(prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef int (prof_dump_open_maps_t)();
+typedef int(prof_dump_open_maps_t)(void);
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
 
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index ba628654..7468885e 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -1,75 +1,94 @@
 #ifndef JEMALLOC_INTERNAL_PROF_TYPES_H
 #define JEMALLOC_INTERNAL_PROF_TYPES_H
 
-typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_tctx_s prof_tctx_t;
-typedef struct prof_info_s prof_info_t;
-typedef struct prof_gctx_s prof_gctx_t;
-typedef struct prof_tdata_s prof_tdata_t;
+typedef struct prof_bt_s     prof_bt_t;
+typedef struct prof_cnt_s    prof_cnt_t;
+typedef struct prof_tctx_s   prof_tctx_t;
+typedef struct prof_info_s   prof_info_t;
+typedef struct prof_gctx_s   prof_gctx_t;
+typedef struct prof_tdata_s  prof_tdata_t;
 typedef struct prof_recent_s prof_recent_t;
 
 /* Option defaults. */
 #ifdef JEMALLOC_PROF
-#  define PROF_PREFIX_DEFAULT		"jeprof"
+#	define PROF_PREFIX_DEFAULT "jeprof"
 #else
-#  define PROF_PREFIX_DEFAULT		""
+#	define PROF_PREFIX_DEFAULT ""
 #endif
-#define LG_PROF_SAMPLE_DEFAULT		19
-#define LG_PROF_INTERVAL_DEFAULT	-1
+#define LG_PROF_SAMPLE_DEFAULT 19
+#define LG_PROF_INTERVAL_DEFAULT -1
 
 /*
  * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
  * is based on __builtin_return_address() necessarily has a hard-coded number
  * of backtrace frame handlers, and should be kept in sync with this setting.
  */
-#define PROF_BT_MAX			128
+#ifdef JEMALLOC_PROF_GCC
+#	define PROF_BT_MAX_LIMIT 256
+#else
+#	define PROF_BT_MAX_LIMIT UINT_MAX
+#endif
+#define PROF_BT_MAX_DEFAULT 128
 
 /* Initial hash table size. */
-#define PROF_CKH_MINITEMS		64
+#define PROF_CKH_MINITEMS 64
 
 /* Size of memory buffer to use when writing dump files. */
 #ifndef JEMALLOC_PROF
 /* Minimize memory bloat for non-prof builds. */
-#  define PROF_DUMP_BUFSIZE		1
+#	define PROF_DUMP_BUFSIZE 1
 #elif defined(JEMALLOC_DEBUG)
 /* Use a small buffer size in debug build, mainly to facilitate testing. */
-#  define PROF_DUMP_BUFSIZE		16
+#	define PROF_DUMP_BUFSIZE 16
 #else
-#  define PROF_DUMP_BUFSIZE		65536
+#	define PROF_DUMP_BUFSIZE 65536
 #endif
 
 /* Size of size class related tables */
 #ifdef JEMALLOC_PROF
-#  define PROF_SC_NSIZES		SC_NSIZES
+#	define PROF_SC_NSIZES SC_NSIZES
 #else
 /* Minimize memory bloat for non-prof builds. */
-#  define PROF_SC_NSIZES		1
+#	define PROF_SC_NSIZES 1
 #endif
 
 /* Size of stack-allocated buffer used by prof_printf(). */
-#define PROF_PRINTF_BUFSIZE		128
+#define PROF_PRINTF_BUFSIZE 128
 
 /*
  * Number of mutexes shared among all gctx's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
-#define PROF_NCTX_LOCKS			1024
+#define PROF_NCTX_LOCKS 1024
 
 /*
  * Number of mutexes shared among all tdata's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
-#define PROF_NTDATA_LOCKS		256
+#define PROF_NTDATA_LOCKS 256
 
 /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
-#define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
+#	define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
 #else
-#define PROF_DUMP_FILENAME_LEN 1
+#	define PROF_DUMP_FILENAME_LEN 1
 #endif
 
 /* Default number of recent allocations to record. */
 #define PROF_RECENT_ALLOC_MAX_DEFAULT 0
 
+/* Thread name storage size limit. */
+#define PROF_THREAD_NAME_MAX_LEN 16
+
+/*
+ * Minimum required alignment for sampled allocations. Over-aligning sampled
+ * allocations allows us to quickly identify them on the dalloc path without
+ * resorting to metadata lookup.
+ */
+#define PROF_SAMPLE_ALIGNMENT PAGE
+#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
+
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index e1d64970..f096e414 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PSSET_H
 #define JEMALLOC_INTERNAL_PSSET_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/hpdata.h"
 
 /*
@@ -20,6 +21,12 @@
  */
 #define PSSET_NPSIZES 64
 
+/*
+ * We store non-hugefied and hugified pageslabs metadata separately.
+ * [0] corresponds to non-hugified and [1] to hugified pageslabs.
+ */
+#define PSSET_NHUGE 2
+
 /*
  * We keep two purge lists per page size class; one for hugified hpdatas (at
  * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
@@ -43,21 +50,36 @@ struct psset_bin_stats_s {
 
 typedef struct psset_stats_s psset_stats_t;
 struct psset_stats_s {
+	/*
+	 * Merged stats for all pageslabs in psset.  This lets us quickly
+	 * answer queries for the number of dirty and active pages in the
+	 * entire set.
+	 */
+	psset_bin_stats_t merged;
+
+	/*
+	 * Below are the same stats, but aggregated by different
+	 * properties of pageslabs: huginess or fullness.
+	 */
+
+	/* Non-huge and huge slabs. */
+	psset_bin_stats_t slabs[PSSET_NHUGE];
+
 	/*
 	 * The second index is huge stats; nonfull_slabs[pszind][0] contains
 	 * stats for the non-huge slabs in bucket pszind, while
 	 * nonfull_slabs[pszind][1] contains stats for the huge slabs.
 	 */
-	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
+	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][PSSET_NHUGE];
 
 	/*
 	 * Full slabs don't live in any edata heap, but we still track their
 	 * stats.
 	 */
-	psset_bin_stats_t full_slabs[2];
+	psset_bin_stats_t full_slabs[PSSET_NHUGE];
 
 	/* Empty slabs are similar. */
-	psset_bin_stats_t empty_slabs[2];
+	psset_bin_stats_t empty_slabs[PSSET_NHUGE];
 };
 
 typedef struct psset_s psset_t;
@@ -68,13 +90,7 @@ struct psset_s {
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	/* Bitmap for which set bits correspond to non-empty heaps. */
-	fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
-	/*
-	 * The sum of all bin stats in stats.  This lets us quickly answer
-	 * queries for the number of dirty, active, and retained pages in the
-	 * entire set.
-	 */
-	psset_bin_stats_t merged_stats;
+	fb_group_t    pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
 	/*
 	 * Slabs with no active allocations, but which are allowed to serve new
@@ -105,8 +121,12 @@ void psset_update_end(psset_t *psset, hpdata_t *ps);
 
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
-/* Pick one to purge. */
-hpdata_t *psset_pick_purge(psset_t *psset);
+/*
+ * Pick one to purge that is purgable before given time (inclusive).  If now
+ * is NULL then time is not considered.
+ */
+hpdata_t *psset_pick_purge(psset_t *psset, const nstime_t *now);
+
 /* Pick one to hugify. */
 hpdata_t *psset_pick_hugify(psset_t *psset);
 
@@ -115,17 +135,17 @@ void psset_remove(psset_t *psset, hpdata_t *ps);
 
 static inline size_t
 psset_npageslabs(psset_t *psset) {
-	return psset->merged_stats.npageslabs;
+	return psset->stats.merged.npageslabs;
 }
 
 static inline size_t
 psset_nactive(psset_t *psset) {
-	return psset->merged_stats.nactive;
+	return psset->stats.merged.nactive;
 }
 
 static inline size_t
 psset_ndirty(psset_t *psset) {
-	return psset->merged_stats.ndirty;
+	return psset->stats.merged.ndirty;
 }
 
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index c7f52f86..9c1776a4 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_QL_H
 #define JEMALLOC_INTERNAL_QL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/qr.h"
 
 /*
@@ -27,33 +28,36 @@
  */
 
 /* List definitions. */
-#define ql_head(a_type)							\
-struct {								\
-	a_type *qlh_first;						\
-}
+#define ql_head(a_type)                                                        \
+	struct {                                                               \
+		a_type *qlh_first;                                             \
+	}
 
 /* Static initializer for an empty list. */
-#define ql_head_initializer(a_head) {NULL}
+#define ql_head_initializer(a_head)                                            \
+	{ NULL }
 
 /* The field definition. */
-#define ql_elm(a_type)	qr(a_type)
+#define ql_elm(a_type) qr(a_type)
 
 /* A pointer to the first element in the list, or NULL if the list is empty. */
 #define ql_first(a_head) ((a_head)->qlh_first)
 
 /* Dynamically initializes a list. */
-#define ql_new(a_head) do {						\
-	ql_first(a_head) = NULL;					\
-} while (0)
+#define ql_new(a_head)                                                         \
+	do {                                                                   \
+		ql_first(a_head) = NULL;                                       \
+	} while (0)
 
 /*
  * Sets dest to be the contents of src (overwriting any elements there), leaving
  * src empty.
  */
-#define ql_move(a_head_dest, a_head_src) do {				\
-	ql_first(a_head_dest) = ql_first(a_head_src);			\
-	ql_new(a_head_src);						\
-} while (0)
+#define ql_move(a_head_dest, a_head_src)                                       \
+	do {                                                                   \
+		ql_first(a_head_dest) = ql_first(a_head_src);                  \
+		ql_new(a_head_src);                                            \
+	} while (0)
 
 /* True if the list is empty, otherwise false. */
 #define ql_empty(a_head) (ql_first(a_head) == NULL)
@@ -67,85 +71,91 @@ struct {								\
 /*
  * Obtains the last item in the list.
  */
-#define ql_last(a_head, a_field)					\
+#define ql_last(a_head, a_field)                                               \
 	(ql_empty(a_head) ? NULL : qr_prev(ql_first(a_head), a_field))
 
 /*
  * Gets a pointer to the next/prev element in the list.  Trying to advance past
  * the end or retreat before the beginning of the list returns NULL.
  */
-#define ql_next(a_head, a_elm, a_field)					\
-	((ql_last(a_head, a_field) != (a_elm))				\
-	    ? qr_next((a_elm), a_field)	: NULL)
-#define ql_prev(a_head, a_elm, a_field)					\
-	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
-				       : NULL)
+#define ql_next(a_head, a_elm, a_field)                                        \
+	((ql_last(a_head, a_field) != (a_elm)) ? qr_next((a_elm), a_field)     \
+	                                       : NULL)
+#define ql_prev(a_head, a_elm, a_field)                                        \
+	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) : NULL)
 
 /* Inserts a_elm before a_qlelm in the list. */
-#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
-	qr_before_insert((a_qlelm), (a_elm), a_field);			\
-	if (ql_first(a_head) == (a_qlelm)) {				\
-		ql_first(a_head) = (a_elm);				\
-	}								\
-} while (0)
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field)                      \
+	do {                                                                   \
+		qr_before_insert((a_qlelm), (a_elm), a_field);                 \
+		if (ql_first(a_head) == (a_qlelm)) {                           \
+			ql_first(a_head) = (a_elm);                            \
+		}                                                              \
+	} while (0)
 
 /* Inserts a_elm after a_qlelm in the list. */
-#define ql_after_insert(a_qlelm, a_elm, a_field)			\
+#define ql_after_insert(a_qlelm, a_elm, a_field)                               \
 	qr_after_insert((a_qlelm), (a_elm), a_field)
 
 /* Inserts a_elm as the first item in the list. */
-#define ql_head_insert(a_head, a_elm, a_field) do {			\
-	if (!ql_empty(a_head)) {					\
-		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
-	}								\
-	ql_first(a_head) = (a_elm);					\
-} while (0)
+#define ql_head_insert(a_head, a_elm, a_field)                                 \
+	do {                                                                   \
+		if (!ql_empty(a_head)) {                                       \
+			qr_before_insert(ql_first(a_head), (a_elm), a_field);  \
+		}                                                              \
+		ql_first(a_head) = (a_elm);                                    \
+	} while (0)
 
 /* Inserts a_elm as the last item in the list. */
-#define ql_tail_insert(a_head, a_elm, a_field) do {			\
-	if (!ql_empty(a_head)) {					\
-		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
-	}								\
-	ql_first(a_head) = qr_next((a_elm), a_field);			\
-} while (0)
+#define ql_tail_insert(a_head, a_elm, a_field)                                 \
+	do {                                                                   \
+		if (!ql_empty(a_head)) {                                       \
+			qr_before_insert(ql_first(a_head), (a_elm), a_field);  \
+		}                                                              \
+		ql_first(a_head) = qr_next((a_elm), a_field);                  \
+	} while (0)
 
 /*
  * Given lists a = [a_1, ..., a_n] and [b_1, ..., b_n], results in:
  * a = [a1, ..., a_n, b_1, ..., b_n] and b = [].
  */
-#define ql_concat(a_head_a, a_head_b, a_field) do {			\
-	if (ql_empty(a_head_a)) {					\
-		ql_move(a_head_a, a_head_b);				\
-	} else if (!ql_empty(a_head_b)) {				\
-		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
-		    a_field);						\
-		ql_new(a_head_b);					\
-	}								\
-} while (0)
+#define ql_concat(a_head_a, a_head_b, a_field)                                 \
+	do {                                                                   \
+		if (ql_empty(a_head_a)) {                                      \
+			ql_move(a_head_a, a_head_b);                           \
+		} else if (!ql_empty(a_head_b)) {                              \
+			qr_meld(                                               \
+			    ql_first(a_head_a), ql_first(a_head_b), a_field);  \
+			ql_new(a_head_b);                                      \
+		}                                                              \
+	} while (0)
 
 /* Removes a_elm from the list. */
-#define ql_remove(a_head, a_elm, a_field) do {				\
-	if (ql_first(a_head) == (a_elm)) {				\
-		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
-	}								\
-	if (ql_first(a_head) != (a_elm)) {				\
-		qr_remove((a_elm), a_field);				\
-	} else {							\
-		ql_new(a_head);						\
-	}								\
-} while (0)
+#define ql_remove(a_head, a_elm, a_field)                                      \
+	do {                                                                   \
+		if (ql_first(a_head) == (a_elm)) {                             \
+			ql_first(a_head) = qr_next(ql_first(a_head), a_field); \
+		}                                                              \
+		if (ql_first(a_head) != (a_elm)) {                             \
+			qr_remove((a_elm), a_field);                           \
+		} else {                                                       \
+			ql_new(a_head);                                        \
+		}                                                              \
+	} while (0)
 
 /* Removes the first item in the list. */
-#define ql_head_remove(a_head, a_type, a_field) do {			\
-	a_type *t = ql_first(a_head);					\
-	ql_remove((a_head), t, a_field);				\
-} while (0)
+#define ql_head_remove(a_head, a_type, a_field)                                \
+	do {                                                                   \
+		a_type *t = ql_first(a_head);                                  \
+		ql_remove((a_head), t, a_field);                               \
+	} while (0)
 
 /* Removes the last item in the list. */
-#define ql_tail_remove(a_head, a_type, a_field) do {			\
-	a_type *t = ql_last(a_head, a_field);				\
-	ql_remove((a_head), t, a_field);				\
-} while (0)
+#define ql_tail_remove(a_head, a_type, a_field)                                \
+	do {                                                                   \
+		a_type *t = ql_last(a_head, a_field);                          \
+		ql_remove((a_head), t, a_field);                               \
+	} while (0)
 
 /*
  * Given a = [a_1, a_2, ..., a_n-1, a_n, a_n+1, ...],
@@ -154,14 +164,15 @@ struct {								\
  * and replaces b's contents with:
  *   b = [a_n, a_n+1, ...]
  */
-#define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
-	if (ql_first(a_head_a) == (a_elm)) {				\
-		ql_move(a_head_b, a_head_a);				\
-	} else {							\
-		qr_split(ql_first(a_head_a), (a_elm), a_field);		\
-		ql_first(a_head_b) = (a_elm);				\
-	}								\
-} while (0)
+#define ql_split(a_head_a, a_elm, a_head_b, a_field)                           \
+	do {                                                                   \
+		if (ql_first(a_head_a) == (a_elm)) {                           \
+			ql_move(a_head_b, a_head_a);                           \
+		} else {                                                       \
+			qr_split(ql_first(a_head_a), (a_elm), a_field);        \
+			ql_first(a_head_b) = (a_elm);                          \
+		}                                                              \
+	} while (0)
 
 /*
  * An optimized version of:
@@ -169,9 +180,10 @@ struct {								\
  *	ql_remove((a_head), t, a_field);
  *	ql_tail_insert((a_head), t, a_field);
  */
-#define ql_rotate(a_head, a_field) do {					\
-	ql_first(a_head) = qr_next(ql_first(a_head), a_field);		\
-} while (0)
+#define ql_rotate(a_head, a_field)                                             \
+	do {                                                                   \
+		ql_first(a_head) = qr_next(ql_first(a_head), a_field);         \
+	} while (0)
 
 /*
  * Helper macro to iterate over each element in a list in order, starting from
@@ -188,10 +200,10 @@ struct {								\
  * }
  */
 
-#define ql_foreach(a_var, a_head, a_field)				\
-	qr_foreach((a_var), ql_first(a_head), a_field)
+#define ql_foreach(a_var, a_head, a_field)                                     \
+	qr_foreach ((a_var), ql_first(a_head), a_field)
 
-#define ql_reverse_foreach(a_var, a_head, a_field)			\
+#define ql_reverse_foreach(a_var, a_head, a_field)                             \
 	qr_reverse_foreach((a_var), ql_first(a_head), a_field)
 
 #endif /* JEMALLOC_INTERNAL_QL_H */
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index ece4f556..1bd61f38 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -17,21 +17,22 @@
  */
 
 /* Ring definitions. */
-#define qr(a_type)							\
-struct {								\
-	a_type	*qre_next;						\
-	a_type	*qre_prev;						\
-}
+#define qr(a_type)                                                             \
+	struct {                                                               \
+		a_type *qre_next;                                              \
+		a_type *qre_prev;                                              \
+	}
 
 /*
  * Initialize a qr link.  Every link must be initialized before being used, even
  * if that initialization is going to be immediately overwritten (say, by being
  * passed into an insertion macro).
  */
-#define qr_new(a_qr, a_field) do {					\
-	(a_qr)->a_field.qre_next = (a_qr);				\
-	(a_qr)->a_field.qre_prev = (a_qr);				\
-} while (0)
+#define qr_new(a_qr, a_field)                                                  \
+	do {                                                                   \
+		(a_qr)->a_field.qre_next = (a_qr);                             \
+		(a_qr)->a_field.qre_prev = (a_qr);                             \
+	} while (0)
 
 /*
  * Go forwards or backwards in the ring.  Note that (the ring being circular), this
@@ -58,26 +59,27 @@ struct {								\
  *
  * a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.
  */
-#define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
-	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
-	    (a_qr_a)->a_field.qre_prev;					\
-	(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\
-	(a_qr_b)->a_field.qre_prev =					\
-	    (a_qr_b)->a_field.qre_prev->a_field.qre_next;		\
-	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
-	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
-} while (0)
+#define qr_meld(a_qr_a, a_qr_b, a_field)                                       \
+	do {                                                                   \
+		(a_qr_b)->a_field.qre_prev->a_field.qre_next =                 \
+		    (a_qr_a)->a_field.qre_prev;                                \
+		(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;       \
+		(a_qr_b)->a_field.qre_prev =                                   \
+		    (a_qr_b)->a_field.qre_prev->a_field.qre_next;              \
+		(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a);       \
+		(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);       \
+	} while (0)
 
 /*
  * Logically, this is just a meld.  The intent, though, is that a_qrelm is a
  * single-element ring, so that "before" has a more obvious interpretation than
  * meld.
  */
-#define qr_before_insert(a_qrelm, a_qr, a_field)			\
+#define qr_before_insert(a_qrelm, a_qr, a_field)                               \
 	qr_meld((a_qrelm), (a_qr), a_field)
 
 /* Ditto, but inserting after rather than before. */
-#define qr_after_insert(a_qrelm, a_qr, a_field)				\
+#define qr_after_insert(a_qrelm, a_qr, a_field)                                \
 	qr_before_insert(qr_next(a_qrelm, a_field), (a_qr), a_field)
 
 /*
@@ -98,14 +100,13 @@ struct {								\
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
-#define qr_split(a_qr_a, a_qr_b, a_field)				\
-	qr_meld((a_qr_a), (a_qr_b), a_field)
+#define qr_split(a_qr_a, a_qr_b, a_field) qr_meld((a_qr_a), (a_qr_b), a_field)
 
 /*
  * Splits off a_qr from the rest of its ring, so that it becomes a
  * single-element ring.
  */
-#define qr_remove(a_qr, a_field)					\
+#define qr_remove(a_qr, a_field)                                               \
 	qr_split(qr_next(a_qr, a_field), (a_qr), a_field)
 
 /*
@@ -121,20 +122,19 @@ struct {								\
  *   return sum;
  * }
  */
-#define qr_foreach(var, a_qr, a_field)					\
-	for ((var) = (a_qr);						\
-	    (var) != NULL;						\
-	    (var) = (((var)->a_field.qre_next != (a_qr))		\
-	    ? (var)->a_field.qre_next : NULL))
+#define qr_foreach(var, a_qr, a_field)                                         \
+	for ((var) = (a_qr); (var) != NULL;                                    \
+	     (var) = (((var)->a_field.qre_next != (a_qr))                      \
+	             ? (var)->a_field.qre_next                                 \
+	             : NULL))
 
 /*
  * The same (and with the same usage) as qr_foreach, but in the opposite order,
  * ending with a_qr.
  */
-#define qr_reverse_foreach(var, a_qr, a_field)				\
-	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
-	    (var) != NULL;						\
-	    (var) = (((var) != (a_qr))					\
-	    ? (var)->a_field.qre_prev : NULL))
+#define qr_reverse_foreach(var, a_qr, a_field)                                 \
+	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;         \
+	     (var) != NULL;                                                    \
+	     (var) = (((var) != (a_qr)) ? (var)->a_field.qre_prev : NULL))
 
 #endif /* JEMALLOC_INTERNAL_QR_H */
diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index c22d753a..2f7c0466 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -6,82 +6,84 @@
  * classes).
  */
 #ifndef LG_QUANTUM
-#  if (defined(__i386__) || defined(_M_IX86))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __ia64__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __alpha__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __arm__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __aarch64__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __hppa__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __loongarch__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __m68k__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __mips__
-#    if defined(__mips_n32) || defined(__mips_n64)
-#      define LG_QUANTUM		4
-#    else
-#      define LG_QUANTUM		3
-#    endif
-#  endif
-#  ifdef __nios2__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __or1k__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __powerpc__
-#    define LG_QUANTUM		4
-#  endif
-#  if defined(__riscv) || defined(__riscv__)
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __s390__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
-	defined(__SH4_SINGLE_ONLY__))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __tile__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __le32__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __arc__
-#    define LG_QUANTUM		3
-#  endif
-#  ifndef LG_QUANTUM
-#    error "Unknown minimum alignment for architecture; specify via "
-	 "--with-lg-quantum"
-#  endif
+#	if (defined(__i386__) || defined(_M_IX86))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __ia64__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __alpha__
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__sparc64__) || defined(__sparcv9)                        \
+	    || defined(__sparc_v9__))
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __arm__
+#		define LG_QUANTUM 3
+#	endif
+#	if defined(__aarch64__) || defined(_M_ARM64)
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __hppa__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __loongarch__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __m68k__
+#		define LG_QUANTUM 3
+#	endif
+#	ifdef __mips__
+#		if defined(__mips_n32) || defined(__mips_n64)
+#			define LG_QUANTUM 4
+#		else
+#			define LG_QUANTUM 3
+#		endif
+#	endif
+#	ifdef __nios2__
+#		define LG_QUANTUM 3
+#	endif
+#	ifdef __or1k__
+#		define LG_QUANTUM 3
+#	endif
+#	if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) \
+	    || defined(__ppc64__)
+#		define LG_QUANTUM 4
+#	endif
+#	if defined(__riscv) || defined(__riscv__)
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __s390__
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__)  \
+	    || defined(__SH4_SINGLE_ONLY__))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __tile__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __le32__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __arc__
+#		define LG_QUANTUM 3
+#	endif
+#	ifndef LG_QUANTUM
+#		error                                                          \
+		    "Unknown minimum alignment for architecture; specify via "
+"--with-lg-quantum"
+#	endif
 #endif
 
-#define QUANTUM			((size_t)(1U << LG_QUANTUM))
-#define QUANTUM_MASK		(QUANTUM - 1)
+#define QUANTUM ((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK (QUANTUM - 1)
 
 /* Return the smallest quantum multiple that is >= a. */
-#define QUANTUM_CEILING(a)						\
-	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+#define QUANTUM_CEILING(a) (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
 
 #endif /* JEMALLOC_INTERNAL_QUANTUM_H */
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index a9a51cb6..58510e4d 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_RB_H
 #define JEMALLOC_INTERNAL_RB_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/safety_check.h"
+
 /*-
  *******************************************************************************
  *
@@ -23,7 +26,7 @@
  */
 
 #ifndef __PGI
-#define RB_COMPACT
+#	define RB_COMPACT
 #endif
 
 /*
@@ -35,6 +38,7 @@
  */
 #define RB_MAX_DEPTH (sizeof(void *) << 4)
 
+/* clang-format off */
 #ifdef RB_COMPACT
 /* Node structure. */
 #define rb_node(a_type)							\
@@ -560,6 +564,20 @@ a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
  * the same as with the unfiltered version, with the added constraint that the
  * returned node must pass the filter.
  */
+JEMALLOC_ALWAYS_INLINE void
+rb_remove_safety_checks(const void *nodep, const char *function_name) {
+	if (!config_opt_safety_checks) {
+		return;
+	}
+	if (unlikely(nodep == NULL)) {
+		safety_check_fail(
+		    "<jemalloc>: Invalid deallocation detected in %s: "
+		    "attempting to remove node from tree but node was "
+		    "not found. Possibly caused by double free bugs.",
+		    function_name);
+        }
+}
+
 #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
     rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp,	\
 	rb_empty_summarize, false)
@@ -852,6 +870,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	    }								\
 	}								\
     }									\
+    rb_remove_safety_checks(nodep, __func__);				\
+    assert(nodep != NULL);                                              \
     assert(nodep->node == node);					\
     pathp--;								\
     if (pathp->node != node) {						\
@@ -1852,5 +1872,6 @@ a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
     return ret;								\
 }									\
 ) /* end rb_summarized_only */
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_RB_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index a00adb29..07205958 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_H
 #define JEMALLOC_INTERNAL_RTREE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree_tsd.h"
 #include "jemalloc/internal/sc.h"
@@ -15,48 +18,49 @@
  */
 
 /* Number of high insignificant bits. */
-#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
+#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR + 3)) - LG_VADDR)
 /* Number of low insigificant bits. */
 #define RTREE_NLIB LG_PAGE
 /* Number of significant bits. */
 #define RTREE_NSB (LG_VADDR - RTREE_NLIB)
 /* Number of levels in radix tree. */
 #if RTREE_NSB <= 10
-#  define RTREE_HEIGHT 1
+#	define RTREE_HEIGHT 1
 #elif RTREE_NSB <= 36
-#  define RTREE_HEIGHT 2
+#	define RTREE_HEIGHT 2
 #elif RTREE_NSB <= 52
-#  define RTREE_HEIGHT 3
+#	define RTREE_HEIGHT 3
 #else
-#  error Unsupported number of significant virtual address bits
+#	error Unsupported number of significant virtual address bits
 #endif
 /* Use compact leaf representation if virtual address encoding allows. */
 #if RTREE_NHIB >= LG_CEIL(SC_NSIZES)
-#  define RTREE_LEAF_COMPACT
+#	define RTREE_LEAF_COMPACT
 #endif
 
 typedef struct rtree_node_elm_s rtree_node_elm_t;
 struct rtree_node_elm_s {
-	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
+	atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */
 };
 
 typedef struct rtree_metadata_s rtree_metadata_t;
 struct rtree_metadata_s {
-	szind_t szind;
-	extent_state_t state; /* Mirrors edata->state. */
-	bool is_head; /* Mirrors edata->is_head. */
-	bool slab;
+	szind_t        szind;
+	extent_state_t state;   /* Mirrors edata->state. */
+	bool           is_head; /* Mirrors edata->is_head. */
+	bool           slab;
 };
 
 typedef struct rtree_contents_s rtree_contents_t;
 struct rtree_contents_s {
-	edata_t *edata;
+	edata_t         *edata;
 	rtree_metadata_t metadata;
 };
 
 #define RTREE_LEAF_STATE_WIDTH EDATA_BITS_STATE_WIDTH
 #define RTREE_LEAF_STATE_SHIFT 2
-#define RTREE_LEAF_STATE_MASK MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
+#define RTREE_LEAF_STATE_MASK                                                  \
+	MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
 
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
@@ -74,36 +78,36 @@ struct rtree_leaf_elm_s {
 	 *
 	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee e00ssshb
 	 */
-	atomic_p_t	le_bits;
+	atomic_p_t le_bits;
 #else
-	atomic_p_t	le_edata; /* (edata_t *) */
+	atomic_p_t le_edata; /* (edata_t *) */
 	/*
 	 * From high to low bits: szind (8 bits), state (4 bits), is_head, slab
 	 */
-	atomic_u_t	le_metadata;
+	atomic_u_t le_metadata;
 #endif
 };
 
 typedef struct rtree_level_s rtree_level_t;
 struct rtree_level_s {
 	/* Number of key bits distinguished by this level. */
-	unsigned		bits;
+	unsigned bits;
 	/*
 	 * Cumulative number of key bits distinguished by traversing to
 	 * corresponding tree level.
 	 */
-	unsigned		cumbits;
+	unsigned cumbits;
 };
 
 typedef struct rtree_s rtree_t;
 struct rtree_s {
-	base_t			*base;
-	malloc_mutex_t		init_lock;
+	base_t        *base;
+	malloc_mutex_t init_lock;
 	/* Number of elements based on rtree_levels[0].bits. */
 #if RTREE_HEIGHT > 1
-	rtree_node_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+	rtree_node_elm_t root[1U << (RTREE_NSB / RTREE_HEIGHT)];
 #else
-	rtree_leaf_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+	rtree_leaf_elm_t root[1U << (RTREE_NSB / RTREE_HEIGHT)];
 #endif
 };
 
@@ -115,17 +119,17 @@ struct rtree_s {
  */
 static const rtree_level_t rtree_levels[] = {
 #if RTREE_HEIGHT == 1
-	{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB, RTREE_NHIB + RTREE_NSB}
 #elif RTREE_HEIGHT == 2
-	{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
-	{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB / 2, RTREE_NHIB + RTREE_NSB / 2},
+    {RTREE_NSB / 2 + RTREE_NSB % 2, RTREE_NHIB + RTREE_NSB}
 #elif RTREE_HEIGHT == 3
-	{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
-	{RTREE_NSB/3 + RTREE_NSB%3/2,
-	    RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
-	{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB / 3, RTREE_NHIB + RTREE_NSB / 3},
+    {RTREE_NSB / 3 + RTREE_NSB % 3 / 2,
+        RTREE_NHIB + RTREE_NSB / 3 * 2 + RTREE_NSB % 3 / 2},
+    {RTREE_NSB / 3 + RTREE_NSB % 3 - RTREE_NSB % 3 / 2, RTREE_NHIB + RTREE_NSB}
 #else
-#  error Unsupported rtree height
+#	error Unsupported rtree height
 #endif
 };
 
@@ -136,9 +140,9 @@ rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
 
 JEMALLOC_ALWAYS_INLINE unsigned
 rtree_leaf_maskbits(void) {
-	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
-	    rtree_levels[RTREE_HEIGHT-1].bits);
+	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR + 3);
+	unsigned cumbits = (rtree_levels[RTREE_HEIGHT - 1].cumbits
+	    - rtree_levels[RTREE_HEIGHT - 1].bits);
 	return ptrbits - cumbits;
 }
 
@@ -150,16 +154,16 @@ rtree_leafkey(uintptr_t key) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 rtree_cache_direct_map(uintptr_t key) {
-	return (size_t)((key >> rtree_leaf_maskbits()) &
-	    (RTREE_CTX_NCACHE - 1));
+	return (
+	    size_t)((key >> rtree_leaf_maskbits()) & (RTREE_CTX_NCACHE - 1));
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(uintptr_t key, unsigned level) {
-	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = rtree_levels[level].cumbits;
-	unsigned shiftbits = ptrbits - cumbits;
-	unsigned maskbits = rtree_levels[level].bits;
+	unsigned  ptrbits = ZU(1) << (LG_SIZEOF_PTR + 3);
+	unsigned  cumbits = rtree_levels[level].cumbits;
+	unsigned  shiftbits = ptrbits - cumbits;
+	unsigned  maskbits = rtree_levels[level].bits;
 	uintptr_t mask = (ZU(1) << maskbits) - 1;
 	return ((key >> shiftbits) & mask);
 }
@@ -175,12 +179,12 @@ rtree_subkey(uintptr_t key, unsigned level) {
  *             dependent on a previous rtree write, which means a stale read
  *             could result if synchronization were omitted here.
  */
-#  ifdef RTREE_LEAF_COMPACT
+#ifdef RTREE_LEAF_COMPACT
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
-	return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+rtree_leaf_elm_bits_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, bool dependent) {
+	return (uintptr_t)atomic_load_p(
+	    &elm->le_bits, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
@@ -192,10 +196,10 @@ rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
 	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
 	uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
-	uintptr_t state_bits = (uintptr_t)contents.metadata.state <<
-	    RTREE_LEAF_STATE_SHIFT;
-	uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits |
-	    slab_bits;
+	uintptr_t state_bits = (uintptr_t)contents.metadata.state
+	    << RTREE_LEAF_STATE_SHIFT;
+	uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits
+	    | slab_bits;
 	assert((edata_bits & metadata_bits) == 0);
 
 	return edata_bits | metadata_bits;
@@ -209,13 +213,13 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	contents.metadata.slab = (bool)(bits & 1);
 	contents.metadata.is_head = (bool)(bits & (1 << 1));
 
-	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
-	    RTREE_LEAF_STATE_SHIFT;
+	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK)
+	    >> RTREE_LEAF_STATE_SHIFT;
 	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
 
 	uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
-#    ifdef __aarch64__
+#	ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
 	 * the higher ones.  Instead, the high bits get zeroed.
@@ -223,64 +227,72 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
 	/* Mask off metadata. */
 	uintptr_t mask = high_bit_mask & low_bit_mask;
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)(bits & mask);
-#    else
+#	else
 	/* Restore sign-extended high bits, mask metadata bits. */
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
-	    >> RTREE_NHIB) & low_bit_mask);
-#    endif
+	                                 >> RTREE_NHIB)
+	    & low_bit_mask);
+#	endif
 	assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
 	return contents;
 }
 
-#  endif /* RTREE_LEAF_COMPACT */
+#endif /* RTREE_LEAF_COMPACT */
 
 JEMALLOC_ALWAYS_INLINE rtree_contents_t
-rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool dependent) {
+rtree_leaf_elm_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	rtree_contents_t contents = rtree_leaf_elm_bits_decode(bits);
 	return contents;
 #else
 	rtree_contents_t contents;
-	unsigned metadata_bits = atomic_load_u(&elm->le_metadata, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+	unsigned         metadata_bits = atomic_load_u(
+            &elm->le_metadata, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 	contents.metadata.slab = (bool)(metadata_bits & 1);
 	contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
 
-	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
-	    RTREE_LEAF_STATE_SHIFT;
+	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK)
+	    >> RTREE_LEAF_STATE_SHIFT;
 	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
-	contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
-	    RTREE_LEAF_STATE_WIDTH);
+	contents.metadata.szind = metadata_bits
+	    >> (RTREE_LEAF_STATE_SHIFT + RTREE_LEAF_STATE_WIDTH);
 
-	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+	contents.edata = (edata_t *)atomic_load_p(
+	    &elm->le_edata, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 
 	return contents;
 #endif
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_contents_encode(rtree_contents_t contents, void **bits,
-    unsigned *additional) {
+rtree_contents_encode(
+    rtree_contents_t contents, void **bits, unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
+	/* Suppress spurious warning from static analysis */
+	if (config_debug) {
+		*additional = 0;
+	}
 #else
 	*additional = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
 	    | ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
-	    | ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
-	    RTREE_LEAF_STATE_WIDTH));
+	    | ((unsigned)contents.metadata.szind
+	        << (RTREE_LEAF_STATE_SHIFT + RTREE_LEAF_STATE_WIDTH));
 	*bits = contents.edata;
 #endif
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, void *bits, unsigned additional) {
+rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    void *bits, unsigned additional) {
 #ifdef RTREE_LEAF_COMPACT
 	atomic_store_p(&elm->le_bits, bits, ATOMIC_RELEASE);
 #else
@@ -294,12 +306,11 @@ rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    rtree_contents_t contents) {
 	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
-	void *bits;
+	void    *bits;
 	unsigned additional;
-
 	rtree_contents_encode(contents, &bits, &additional);
 	rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
 }
@@ -314,8 +325,10 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
 	    /* dependent */ true);
 	bits &= ~RTREE_LEAF_STATE_MASK;
 	bits |= state << RTREE_LEAF_STATE_SHIFT;
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	atomic_store_p(&elm1->le_bits, (void *)bits, ATOMIC_RELEASE);
 	if (elm2 != NULL) {
+		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 		atomic_store_p(&elm2->le_bits, (void *)bits, ATOMIC_RELEASE);
 	}
 #else
@@ -337,7 +350,7 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
 JEMALLOC_ALWAYS_INLINE bool
 rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, rtree_leaf_elm_t **elm) {
-	size_t slot = rtree_cache_direct_map(key);
+	size_t    slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
@@ -347,7 +360,7 @@ rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 	rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
 	assert(leaf != NULL);
-	uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+	uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT - 1);
 	*elm = &leaf[subkey];
 
 	return false;
@@ -359,7 +372,7 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	assert(key != 0);
 	assert(!dependent || !init_missing);
 
-	size_t slot = rtree_cache_direct_map(key);
+	size_t    slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
@@ -367,39 +380,41 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
 		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
 		assert(leaf != NULL);
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT - 1);
 		return &leaf[subkey];
 	}
 	/*
 	 * Search the L2 LRU cache.  On hit, swap the matching element into the
 	 * slot in L1 cache, and move the position in L2 up by 1.
 	 */
-#define RTREE_CACHE_CHECK_L2(i) do {					\
-	if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) {	\
-		rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf;	\
-		assert(leaf != NULL);					\
-		if (i > 0) {						\
-			/* Bubble up by one. */				\
-			rtree_ctx->l2_cache[i].leafkey =		\
-				rtree_ctx->l2_cache[i - 1].leafkey;	\
-			rtree_ctx->l2_cache[i].leaf =			\
-				rtree_ctx->l2_cache[i - 1].leaf;	\
-			rtree_ctx->l2_cache[i - 1].leafkey =		\
-			    rtree_ctx->cache[slot].leafkey;		\
-			rtree_ctx->l2_cache[i - 1].leaf =		\
-			    rtree_ctx->cache[slot].leaf;		\
-		} else {						\
-			rtree_ctx->l2_cache[0].leafkey =		\
-			    rtree_ctx->cache[slot].leafkey;		\
-			rtree_ctx->l2_cache[0].leaf =			\
-			    rtree_ctx->cache[slot].leaf;		\
-		}							\
-		rtree_ctx->cache[slot].leafkey = leafkey;		\
-		rtree_ctx->cache[slot].leaf = leaf;			\
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);	\
-		return &leaf[subkey];					\
-	}								\
-} while (0)
+#define RTREE_CACHE_CHECK_L2(i)                                                \
+	do {                                                                   \
+		if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) {       \
+			rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf;  \
+			assert(leaf != NULL);                                  \
+			if (i > 0) {                                           \
+				/* Bubble up by one. */                        \
+				rtree_ctx->l2_cache[i].leafkey =               \
+				    rtree_ctx->l2_cache[i - 1].leafkey;        \
+				rtree_ctx->l2_cache[i].leaf =                  \
+				    rtree_ctx->l2_cache[i - 1].leaf;           \
+				rtree_ctx->l2_cache[i - 1].leafkey =           \
+				    rtree_ctx->cache[slot].leafkey;            \
+				rtree_ctx->l2_cache[i - 1].leaf =              \
+				    rtree_ctx->cache[slot].leaf;               \
+			} else {                                               \
+				rtree_ctx->l2_cache[0].leafkey =               \
+				    rtree_ctx->cache[slot].leafkey;            \
+				rtree_ctx->l2_cache[0].leaf =                  \
+				    rtree_ctx->cache[slot].leaf;               \
+			}                                                      \
+			rtree_ctx->cache[slot].leafkey = leafkey;              \
+			rtree_ctx->cache[slot].leaf = leaf;                    \
+			uintptr_t subkey = rtree_subkey(                       \
+			    key, RTREE_HEIGHT - 1);                            \
+			return &leaf[subkey];                                  \
+		}                                                              \
+	} while (0)
 	/* Check the first cache entry. */
 	RTREE_CACHE_CHECK_L2(0);
 	/* Search the remaining cache elements. */
@@ -408,8 +423,8 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	}
 #undef RTREE_CACHE_CHECK_L2
 
-	return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key,
-	    dependent, init_missing);
+	return rtree_leaf_elm_lookup_hard(
+	    tsdn, rtree, rtree_ctx, key, dependent, init_missing);
 }
 
 /*
@@ -429,8 +444,8 @@ rtree_read_independent(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 }
 
 static inline rtree_contents_t
-rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
@@ -438,21 +453,22 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 }
 
 static inline rtree_metadata_t
-rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_metadata_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
 	return rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).metadata;
+	    /* dependent */ true)
+	    .metadata;
 }
 
 /*
  * Returns true when the request cannot be fulfilled by fastpath.
  */
 static inline bool
-rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
+rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
 	rtree_leaf_elm_t *elm;
 	/*
 	 * Should check the bool return value (lookup success or not) instead of
@@ -465,7 +481,8 @@ rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ct
 	}
 	assert(elm != NULL);
 	*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).metadata;
+	    /* dependent */ true)
+	                        .metadata;
 	return false;
 }
 
@@ -479,22 +496,27 @@ rtree_write_range_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	 * are dependent w/o init_missing, assuming the range spans across at
 	 * most 2 rtree leaf nodes (each covers 1 GiB of vaddr).
 	 */
-	void *bits;
+	void    *bits;
 	unsigned additional;
 	rtree_contents_encode(contents, &bits, &additional);
 
 	rtree_leaf_elm_t *elm = NULL; /* Dead store. */
 	for (uintptr_t addr = base; addr <= end; addr += PAGE) {
-		if (addr == base ||
-		    (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
-			elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+		if (addr == base
+		    || (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
+			elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+			    addr,
 			    /* dependent */ true, /* init_missing */ false);
 			assert(elm != NULL);
 		}
-		assert(elm == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
-		    /* dependent */ true, /* init_missing */ false));
-		assert(!clearing || rtree_leaf_elm_read(tsdn, rtree, elm,
-		    /* dependent */ true).edata != NULL);
+		assert(elm
+		    == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+		        /* dependent */ true, /* init_missing */ false));
+		assert(!clearing
+		    || rtree_leaf_elm_read(tsdn, rtree, elm,
+		           /* dependent */ true)
+		            .edata
+		        != NULL);
 		rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
 		elm++;
 	}
@@ -522,13 +544,15 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 }
 
 static inline void
-rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_clear(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
 	assert(rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).edata != NULL);
+	           /* dependent */ true)
+	           .edata
+	    != NULL);
 	rtree_contents_t contents;
 	contents.edata = NULL;
 	contents.metadata.szind = SC_NSIZES;
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index e45525c5..4014dde0 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_CTX_H
 #define JEMALLOC_INTERNAL_RTREE_CTX_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively.  Each
  * entry supports an entire leaf, so the cache hit rate is typically high even
@@ -23,7 +25,8 @@
 
 /* Needed for initialization only. */
 #define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
-#define RTREE_CTX_CACHE_ELM_INVALID {RTREE_LEAFKEY_INVALID, NULL}
+#define RTREE_CTX_CACHE_ELM_INVALID                                            \
+	{ RTREE_LEAFKEY_INVALID, NULL }
 
 #define RTREE_CTX_INIT_ELM_1 RTREE_CTX_CACHE_ELM_INVALID
 #define RTREE_CTX_INIT_ELM_2 RTREE_CTX_INIT_ELM_1, RTREE_CTX_INIT_ELM_1
@@ -38,23 +41,27 @@
  * Static initializer (to invalidate the cache entries) is required because the
  * free fastpath may access the rtree cache before a full tsd initialization.
  */
-#define RTREE_CTX_INITIALIZER {{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, \
-			       {RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)}}
+#define RTREE_CTX_INITIALIZER                                                  \
+	{                                                                      \
+		{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, {                 \
+			RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)           \
+		}                                                              \
+	}
 
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
 
 typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
 struct rtree_ctx_cache_elm_s {
-	uintptr_t		leafkey;
-	rtree_leaf_elm_t	*leaf;
+	uintptr_t         leafkey;
+	rtree_leaf_elm_t *leaf;
 };
 
 typedef struct rtree_ctx_s rtree_ctx_t;
 struct rtree_ctx_s {
 	/* Direct mapped cache. */
-	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
+	rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
 	/* L2 LRU cache. */
-	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
+	rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2];
 };
 
 void rtree_ctx_data_init(rtree_ctx_t *ctx);
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index f1a74f17..2b4b2d0e 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,8 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
-    size_t true_size, size_t input_size);
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/pages.h"
+
+#define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
+
+void safety_check_fail_sized_dealloc(
+    bool current_dealloc, const void *ptr, size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
 
 typedef void (*safety_check_abort_hook_t)(const char *message);
@@ -10,22 +16,51 @@ typedef void (*safety_check_abort_hook_t)(const char *message);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
 
+#define REDZONE_SIZE ((size_t)32)
+#define REDZONE_FILL_VALUE 0xBC
+
+/*
+ * Normally the redzone extends `REDZONE_SIZE` bytes beyond the end of
+ * the allocation. However, we don't let the redzone extend onto another
+ * OS page because this would impose additional overhead if that page was
+ * not already resident in memory.
+ */
+JEMALLOC_ALWAYS_INLINE const unsigned char *
+compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
+	const unsigned char *ptr = (const unsigned char *)_ptr;
+	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize
+	    ? &ptr[usize + REDZONE_SIZE]
+	    : &ptr[bumped_usize];
+	const unsigned char *page_end = (const unsigned char *)
+	    ALIGNMENT_ADDR2CEILING(&ptr[usize], os_page);
+	return redzone_end < page_end ? redzone_end : page_end;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
-	assert(usize < bumped_usize);
-	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
-		*((unsigned char *)ptr + i) = 0xBC;
+	assert(usize <= bumped_usize);
+	const unsigned char *redzone_end = compute_redzone_end(
+	    ptr, usize, bumped_usize);
+	for (unsigned char *curr = &((unsigned char *)ptr)[usize];
+	     curr < redzone_end; curr++) {
+		*curr = REDZONE_FILL_VALUE;
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
-{
-	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
-		if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) {
+safety_check_verify_redzone(
+    const void *ptr, size_t usize, size_t bumped_usize) {
+	const unsigned char *redzone_end = compute_redzone_end(
+	    ptr, usize, bumped_usize);
+	for (const unsigned char *curr = &((const unsigned char *)ptr)[usize];
+	     curr < redzone_end; curr++) {
+		if (unlikely(*curr != REDZONE_FILL_VALUE)) {
 			safety_check_fail("Use after free error\n");
 		}
 	}
 }
 
+#undef REDZONE_SIZE
+#undef REDZONE_FILL_VALUE
+
 #endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 8813d6bb..5dcae376 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_GUARD_H
 #define JEMALLOC_INTERNAL_GUARD_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/tsd.h"
 
 #define SAN_PAGE_GUARD PAGE
 #define SAN_PAGE_GUARDS_SIZE (SAN_PAGE_GUARD * 2)
@@ -29,22 +32,22 @@ void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * Unguard the extent, but don't modify emap boundaries. Must be called on an
  * extent that has been erased from emap and shouldn't be placed back.
  */
-void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
-    edata_t *edata, emap_t *emap);
+void san_unguard_pages_pre_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
 void san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize);
 
 void tsd_san_init(tsd_t *tsd);
 void san_init(ssize_t lg_san_uaf_align);
 
 static inline void
-san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap, bool remap) {
+san_guard_pages_two_sided(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap, bool remap) {
 	san_guard_pages(tsdn, ehooks, edata, emap, true, true, remap);
 }
 
 static inline void
-san_unguard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
+san_unguard_pages_two_sided(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
 }
 
@@ -80,14 +83,14 @@ san_guard_enabled(void) {
 }
 
 static inline bool
-san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
-    size_t alignment) {
-	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks) ||
-	    tsdn_null(tsdn)) {
+san_large_extent_decide_guard(
+    tsdn_t *tsdn, ehooks_t *ehooks, size_t size, size_t alignment) {
+	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks)
+	    || tsdn_null(tsdn)) {
 		return false;
 	}
 
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint64_t n = tsd_san_extents_until_guard_large_get(tsd);
 	assert(n >= 1);
 	if (n > 1) {
@@ -98,10 +101,10 @@ san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 		*tsd_san_extents_until_guard_largep_get(tsd) = n - 1;
 	}
 
-	if (n == 1 && (alignment <= PAGE) &&
-	    (san_two_side_guarded_sz(size) <= SC_LARGE_MAXCLASS)) {
-		*tsd_san_extents_until_guard_largep_get(tsd) =
-		    opt_san_guard_large;
+	if (n == 1 && (alignment <= PAGE)
+	    && (san_two_side_guarded_sz(size) <= SC_LARGE_MAXCLASS)) {
+		*tsd_san_extents_until_guard_largep_get(
+		    tsd) = opt_san_guard_large;
 		return true;
 	} else {
 		assert(tsd_san_extents_until_guard_large_get(tsd) >= 1);
@@ -111,17 +114,17 @@ san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 
 static inline bool
 san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
-	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks) ||
-	    tsdn_null(tsdn)) {
+	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks)
+	    || tsdn_null(tsdn)) {
 		return false;
 	}
 
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint64_t n = tsd_san_extents_until_guard_small_get(tsd);
 	assert(n >= 1);
 	if (n == 1) {
-		*tsd_san_extents_until_guard_smallp_get(tsd) =
-		    opt_san_guard_small;
+		*tsd_san_extents_until_guard_smallp_get(
+		    tsd) = opt_san_guard_small;
 		return true;
 	} else {
 		*tsd_san_extents_until_guard_smallp_get(tsd) = n - 1;
@@ -131,13 +134,13 @@ san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
 }
 
 static inline void
-san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
-    void **last) {
+san_junk_ptr_locations(
+    void *ptr, size_t usize, void **first, void **mid, void **last) {
 	size_t ptr_sz = sizeof(void *);
 
 	*first = ptr;
 
-	*mid = (void *)((uintptr_t)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
+	*mid = (void *)((byte_t *)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
 	assert(*first != *mid || usize == ptr_sz);
 	assert((uintptr_t)*first <= (uintptr_t)*mid);
 
@@ -148,7 +151,7 @@ san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
 	 * default the tcache only goes up to the 32K size class, and is usually
 	 * tuned lower instead of higher, which makes it less of a concern.
 	 */
-	*last = (void *)((uintptr_t)ptr + usize - sizeof(uaf_detect_junk));
+	*last = (void *)((byte_t *)ptr + usize - sizeof(uaf_detect_junk));
 	assert(*first != *last || usize == ptr_sz);
 	assert(*mid != *last || usize <= ptr_sz * 2);
 	assert((uintptr_t)*mid <= (uintptr_t)*last);
@@ -181,8 +184,8 @@ static inline bool
 san_uaf_detection_enabled(void) {
 	bool ret = config_uaf_detection && (opt_lg_san_uaf_align != -1);
 	if (config_uaf_detection && ret) {
-		assert(san_cache_bin_nonfast_mask == ((uintptr_t)1 <<
-		    opt_lg_san_uaf_align) - 1);
+		assert(san_cache_bin_nonfast_mask
+		    == ((uintptr_t)1 << opt_lg_san_uaf_align) - 1);
 	}
 
 	return ret;
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index 8ec4a710..9e42b69b 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -1,16 +1,18 @@
 #ifndef JEMALLOC_INTERNAL_SAN_BUMP_H
 #define JEMALLOC_INTERNAL_SAN_BUMP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/witness.h"
 
 #define SBA_RETAINED_ALLOC_SIZE ((size_t)4 << 20)
 
 extern bool opt_retain;
 
 typedef struct ehooks_s ehooks_t;
-typedef struct pac_s pac_t;
+typedef struct pac_s    pac_t;
 
 typedef struct san_bump_alloc_s san_bump_alloc_t;
 struct san_bump_alloc_s {
@@ -20,7 +22,7 @@ struct san_bump_alloc_s {
 };
 
 static inline bool
-san_bump_enabled() {
+san_bump_enabled(void) {
 	/*
 	 * We enable san_bump allocator only when it's possible to break up a
 	 * mapping and unmap a part of it (maps_coalesce). This is needed to
@@ -34,7 +36,7 @@ san_bump_enabled() {
 }
 
 static inline bool
-san_bump_alloc_init(san_bump_alloc_t* sba) {
+san_bump_alloc_init(san_bump_alloc_t *sba) {
 	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
 	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
 	if (err) {
@@ -45,8 +47,7 @@ san_bump_alloc_init(san_bump_alloc_t* sba) {
 	return false;
 }
 
-edata_t *
-san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac, ehooks_t *ehooks,
-    size_t size, bool zero);
+edata_t *san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size, bool zero);
 
 #endif /* JEMALLOC_INTERNAL_SAN_BUMP_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 9bab347b..17a8278a 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SC_H
 #define JEMALLOC_INTERNAL_SC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 /*
@@ -173,7 +174,7 @@
 
 #if SC_LG_TINY_MIN == 0
 /* The div module doesn't support division by 1, which this would require. */
-#error "Unsupported LG_TINY_MIN"
+#	error "Unsupported LG_TINY_MIN"
 #endif
 
 /*
@@ -193,8 +194,8 @@
  * We could probably save some space in arenas by capping this at LG_VADDR size.
  */
 #define SC_LG_BASE_MAX (SC_PTR_BITS - 2)
-#define SC_NREGULAR (SC_NGROUP * 					\
-    (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
+#define SC_NREGULAR                                                            \
+	(SC_NGROUP * (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
 #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
 
 /*
@@ -221,29 +222,29 @@
  *
  * This gives us the quantity we seek.
  */
-#define SC_NPSIZES (							\
-    SC_NGROUP								\
-    + (SC_LG_BASE_MAX - (LG_PAGE + SC_LG_NGROUP)) * SC_NGROUP		\
-    + SC_NGROUP - 1)
+#define SC_NPSIZES                                                             \
+	(SC_NGROUP + (SC_LG_BASE_MAX - (LG_PAGE + SC_LG_NGROUP)) * SC_NGROUP   \
+	    + SC_NGROUP - 1)
 
 /*
  * We declare a size class is binnable if size < page size * group. Or, in other
  * words, lg(size) < lg(page size) + lg(group size).
  */
-#define SC_NBINS (							\
-    /* Sub-regular size classes. */					\
-    SC_NTINY + SC_NPSEUDO						\
-    /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */	\
-    + SC_NGROUP * (LG_PAGE + SC_LG_NGROUP - SC_LG_FIRST_REGULAR_BASE)	\
-    /* Last SC of the last group hits the bound exactly; exclude it. */	\
-    - 1)
+#define SC_NBINS                                                                                                    \
+	(/* Sub-regular size classes. */                                                                            \
+	    SC_NTINY                                                                                                \
+	    + SC_NPSEUDO /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */                            \
+	    + SC_NGROUP                                                                                             \
+	        * (LG_PAGE + SC_LG_NGROUP                                                                           \
+	            - SC_LG_FIRST_REGULAR_BASE) /* Last SC of the last group hits the bound exactly; exclude it. */ \
+	    - 1)
 
 /*
  * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
  * cannot support more than 256 small size classes.
  */
 #if (SC_NBINS > 256)
-#  error "Too many small size classes"
+#	error "Too many small size classes"
 #endif
 
 /* The largest size class in the lookup table, and its binary log. */
@@ -255,12 +256,12 @@
 #define SC_SMALL_MAX_DELTA (1 << (LG_PAGE - 1))
 
 /* The largest size class allocated out of a slab. */
-#define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE				\
-    + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
+#define SC_SMALL_MAXCLASS                                                      \
+	(SC_SMALL_MAX_BASE + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
 
 /* The fastpath assumes all lookup-able sizes are small. */
 #if (SC_SMALL_MAXCLASS < SC_LOOKUP_MAXCLASS)
-#  error "Lookup table sizes must be small"
+#	error "Lookup table sizes must be small"
 #endif
 
 /* The smallest size class not allocated out of a slab. */
@@ -276,15 +277,33 @@
 
 /* Maximum number of regions in one slab. */
 #ifndef CONFIG_LG_SLAB_MAXREGS
-#  define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
+#	define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
 #else
-#  if CONFIG_LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN)
-#    error "Unsupported SC_LG_SLAB_MAXREGS"
-#  else
-#    define SC_LG_SLAB_MAXREGS CONFIG_LG_SLAB_MAXREGS
-#  endif
+#	if CONFIG_LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN)
+#		error "Unsupported SC_LG_SLAB_MAXREGS"
+#	else
+#		define SC_LG_SLAB_MAXREGS CONFIG_LG_SLAB_MAXREGS
+#	endif
 #endif
 
+/*
+ * When large size classes are disabled, there is no concept of size classes
+ * for sizes > SC_SMALLMAXCLASS (or >= SC_LARGE_MINCLASS).  This ensures that
+ * the overhead between the usable size and the user request size will not
+ * exceed PAGE.  Between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
+ * 2 * SC_NGROUP * PAGE, the size classes also happen to be aligned with PAGE.
+ * Since tcache relies on size classes to work and it greatly increases the
+ * perf of allocs & deallocs, we extend the existence of size class to
+ * 2 * SC_NGROUP * PAGE ONLY for the tcache module.  This means for all other
+ * modules, there is no size class for sizes >= SC_LARGE_MINCLASS.  Yet for
+ * tcache, the threshold is moved up to 2 * SC_NGROUP * PAGE, which is
+ * USIZE_GROW_SLOW_THRESHOLD defined below.  With the default SC_NGROUP being
+ * 2, and PAGE being 4KB, the threshold for tcache (USIZE_GROW_SLOW_THRESHOLD)
+ * is 32KB.
+ */
+#define LG_USIZE_GROW_SLOW_THRESHOLD (SC_LG_NGROUP + LG_PAGE + 1)
+#define USIZE_GROW_SLOW_THRESHOLD (1U << LG_USIZE_GROW_SLOW_THRESHOLD)
+
 #define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
 typedef struct sc_s sc_t;
@@ -345,13 +364,13 @@ struct sc_data_s {
 };
 
 size_t reg_size_compute(int lg_base, int lg_delta, int ndelta);
-void sc_data_init(sc_data_t *data);
+void   sc_data_init(sc_data_t *data);
 /*
  * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
  * Otherwise, does its best to accommodate the request.
  */
-void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
-    int pgs);
+void sc_data_update_slab_size(
+    sc_data_t *data, size_t begin, size_t end, int pgs);
 void sc_boot(sc_data_t *data);
 
 #endif /* JEMALLOC_INTERNAL_SC_H */
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index fa863382..cc458b9d 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -1,8 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_SEC_H
 #define JEMALLOC_INTERNAL_SEC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/sec_opts.h"
 
 /*
  * Small extent cache.
@@ -13,91 +17,104 @@
  * knowledge of the underlying PAI implementation).
  */
 
-/*
- * For now, this is just one field; eventually, we'll probably want to get more
- * fine-grained data out (like per-size class statistics).
- */
+typedef struct sec_bin_stats_s sec_bin_stats_t;
+struct sec_bin_stats_s {
+	/* Number of alloc requests that did not find extent in this bin */
+	size_t nmisses;
+	/* Number of successful alloc requests. */
+	size_t nhits;
+	/* Number of dallocs causing the flush */
+	size_t ndalloc_flush;
+	/* Number of dallocs not causing the flush */
+	size_t ndalloc_noflush;
+	/* Number of fills that hit max_bytes */
+	size_t noverfills;
+};
 typedef struct sec_stats_s sec_stats_t;
 struct sec_stats_s {
 	/* Sum of bytes_cur across all shards. */
 	size_t bytes;
+
+	/* Totals of bin_stats. */
+	sec_bin_stats_t total;
 };
 
+static inline void
+sec_bin_stats_init(sec_bin_stats_t *stats) {
+	stats->ndalloc_flush = 0;
+	stats->nmisses = 0;
+	stats->nhits = 0;
+	stats->ndalloc_noflush = 0;
+	stats->noverfills = 0;
+}
+
+static inline void
+sec_bin_stats_accum(sec_bin_stats_t *dst, sec_bin_stats_t *src) {
+	dst->nmisses += src->nmisses;
+	dst->nhits += src->nhits;
+	dst->ndalloc_flush += src->ndalloc_flush;
+	dst->ndalloc_noflush += src->ndalloc_noflush;
+	dst->noverfills += src->noverfills;
+}
+
 static inline void
 sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
 	dst->bytes += src->bytes;
+	sec_bin_stats_accum(&dst->total, &src->total);
 }
 
 /* A collections of free extents, all of the same size. */
 typedef struct sec_bin_s sec_bin_t;
 struct sec_bin_s {
 	/*
-	 * When we fail to fulfill an allocation, we do a batch-alloc on the
-	 * underlying allocator to fill extra items, as well.  We drop the SEC
-	 * lock while doing so, to allow operations on other bins to succeed.
-	 * That introduces the possibility of other threads also trying to
-	 * allocate out of this bin, failing, and also going to the backing
-	 * allocator.  To avoid a thundering herd problem in which lots of
-	 * threads do batch allocs and overfill this bin as a result, we only
-	 * allow one batch allocation at a time for a bin.  This bool tracks
-	 * whether or not some thread is already batch allocating.
-	 *
-	 * Eventually, the right answer may be a smarter sharding policy for the
-	 * bins (e.g. a mutex per bin, which would also be more scalable
-	 * generally; the batch-allocating thread could hold it while
-	 * batch-allocating).
-	 */
-	bool being_batch_filled;
-
-	/*
-	 * Number of bytes in this particular bin (as opposed to the
-	 * sec_shard_t's bytes_cur.  This isn't user visible or reported in
-	 * stats; rather, it allows us to quickly determine the change in the
-	 * centralized counter when flushing.
-	 */
-	size_t bytes_cur;
-	edata_list_active_t freelist;
-};
-
-typedef struct sec_shard_s sec_shard_t;
-struct sec_shard_s {
-	/*
-	 * We don't keep per-bin mutexes, even though that would allow more
-	 * sharding; this allows global cache-eviction, which in turn allows for
-	 * better balancing across free lists.
+	 * Protects the data members of the bin.
 	 */
 	malloc_mutex_t mtx;
+
 	/*
-	 * A SEC may need to be shut down (i.e. flushed of its contents and
-	 * prevented from further caching).  To avoid tricky synchronization
-	 * issues, we just track enabled-status in each shard, guarded by a
-	 * mutex.  In practice, this is only ever checked during brief races,
-	 * since the arena-level atomic boolean tracking HPA enabled-ness means
-	 * that we won't go down these pathways very often after custom extent
-	 * hooks are installed.
+	 * Number of bytes in this particular bin.
 	 */
-	bool enabled;
-	sec_bin_t *bins;
-	/* Number of bytes in all bins in the shard. */
-	size_t bytes_cur;
-	/* The next pszind to flush in the flush-some pathways. */
-	pszind_t to_flush_next;
+	size_t              bytes_cur;
+	edata_list_active_t freelist;
+	sec_bin_stats_t     stats;
 };
 
 typedef struct sec_s sec_t;
 struct sec_s {
-	pai_t pai;
-	pai_t *fallback;
-
 	sec_opts_t opts;
-	sec_shard_t *shards;
-	pszind_t npsizes;
+	sec_bin_t *bins;
+	pszind_t   npsizes;
 };
 
-bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
-    const sec_opts_t *opts);
-void sec_flush(tsdn_t *tsdn, sec_t *sec);
-void sec_disable(tsdn_t *tsdn, sec_t *sec);
+static inline bool
+sec_is_used(sec_t *sec) {
+	return sec->opts.nshards != 0;
+}
+
+static inline bool
+sec_size_supported(sec_t *sec, size_t size) {
+	return sec_is_used(sec) && size <= sec->opts.max_alloc;
+}
+
+/* If sec does not have extent available, it will return NULL. */
+edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size);
+void     sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size,
+        edata_list_active_t *result, size_t nallocs);
+
+/*
+ * Upon return dalloc_list may be empty if edata is consumed by sec or non-empty
+ * if there are extents that need to be flushed from cache.  Please note, that
+ * if we need to flush, extent(s) returned in the list to be deallocated
+ * will almost certainly not contain the one being dalloc-ed (that one will be
+ * considered "hot" and preserved in the cache, while "colder" ones are
+ * returned).
+ */
+void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list);
+
+bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts);
+
+/* Fills to_flush with extents that need to be deallocated */
+void sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush);
 
 /*
  * Morally, these two stats methods probably ought to be a single one (and the
@@ -106,8 +123,8 @@ void sec_disable(tsdn_t *tsdn, sec_t *sec);
  * split), which simplifies the stats management.
  */
 void sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats);
-void sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
-    mutex_prof_data_t *mutex_prof_data);
+void sec_mutex_stats_read(
+    tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data);
 
 /*
  * We use the arena lock ordering; these are acquired in phase 2 of forking, but
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index a3ad72fb..039d423c 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SEC_OPTS_H
 #define JEMALLOC_INTERNAL_SEC_OPTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * The configuration settings used by an sec_t.  Morally, this is part of the
  * SEC interface, but we put it here for header-ordering reasons.
@@ -10,50 +12,39 @@ typedef struct sec_opts_s sec_opts_t;
 struct sec_opts_s {
 	/*
 	 * We don't necessarily always use all the shards; requests are
-	 * distributed across shards [0, nshards - 1).
+	 * distributed across shards [0, nshards - 1).  Once thread picks a
+	 * shard it will always use that one.  If this value is set to 0 sec is
+	 * not used.
 	 */
 	size_t nshards;
 	/*
 	 * We'll automatically refuse to cache any objects in this sec if
-	 * they're larger than max_alloc bytes, instead forwarding such objects
-	 * directly to the fallback.
+	 * they're larger than max_alloc bytes.
 	 */
 	size_t max_alloc;
 	/*
-	 * Exceeding this amount of cached extents in a shard causes us to start
-	 * flushing bins in that shard until we fall below bytes_after_flush.
+	 * Exceeding this amount of cached extents in a bin causes us to flush
+	 * until we are 1/4 below max_bytes.
 	 */
 	size_t max_bytes;
-	/*
-	 * The number of bytes (in all bins) we flush down to when we exceed
-	 * bytes_cur.  We want this to be less than bytes_cur, because
-	 * otherwise we could get into situations where a shard undergoing
-	 * net-deallocation keeps bytes_cur very near to max_bytes, so that
-	 * most deallocations get immediately forwarded to the underlying PAI
-	 * implementation, defeating the point of the SEC.
-	 */
-	size_t bytes_after_flush;
 	/*
 	 * When we can't satisfy an allocation out of the SEC because there are
-	 * no available ones cached, we allocate multiple of that size out of
-	 * the fallback allocator.  Eventually we might want to do something
-	 * cleverer, but for now we just grab a fixed number.
+	 * no available ones cached, allocator will allocate a batch with extra
+	 * batch_fill_extra extents of the same size.
 	 */
 	size_t batch_fill_extra;
 };
 
-#define SEC_OPTS_DEFAULT {						\
-	/* nshards */							\
-	4,								\
-	/* max_alloc */							\
-	(32 * 1024) < PAGE ? PAGE : (32 * 1024),			\
-	/* max_bytes */							\
-	256 * 1024,							\
-	/* bytes_after_flush */						\
-	128 * 1024,							\
-	/* batch_fill_extra */						\
-	0								\
-}
+#define SEC_OPTS_NSHARDS_DEFAULT 2
+#define SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT 3
+#define SEC_OPTS_MAX_ALLOC_DEFAULT ((32 * 1024) < PAGE ? PAGE : (32 * 1024))
+#define SEC_OPTS_MAX_BYTES_DEFAULT                                             \
+	((256 * 1024) < (4 * SEC_OPTS_MAX_ALLOC_DEFAULT)                       \
+	        ? (4 * SEC_OPTS_MAX_ALLOC_DEFAULT)                             \
+	        : (256 * 1024))
 
+#define SEC_OPTS_DEFAULT                                                       \
+	{SEC_OPTS_NSHARDS_DEFAULT, SEC_OPTS_MAX_ALLOC_DEFAULT,                 \
+	    SEC_OPTS_MAX_BYTES_DEFAULT, SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT}
 
 #endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/include/jemalloc/internal/seq.h b/include/jemalloc/internal/seq.h
index ef2df4c6..d2c0d1fc 100644
--- a/include/jemalloc/internal/seq.h
+++ b/include/jemalloc/internal/seq.h
@@ -1,12 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_SEQ_H
 #define JEMALLOC_INTERNAL_SEQ_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 
 /*
  * A simple seqlock implementation.
  */
 
+/* clang-format off */
 #define seq_define(type, short_type)					\
 typedef struct {							\
 	atomic_zu_t seq;						\
@@ -51,5 +53,6 @@ seq_try_load_##short_type(type *dst, seq_##short_type##_t *src) {	\
 	memcpy(dst, buf, sizeof(type));					\
 	return true;							\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_SEQ_H */
diff --git a/include/jemalloc/internal/slab_data.h b/include/jemalloc/internal/slab_data.h
index e821863d..724c71e3 100644
--- a/include/jemalloc/internal/slab_data.h
+++ b/include/jemalloc/internal/slab_data.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SLAB_DATA_H
 #define JEMALLOC_INTERNAL_SLAB_DATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bitmap.h"
 
 typedef struct slab_data_s slab_data_t;
diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h
index 2e14430f..135c4905 100644
--- a/include/jemalloc/internal/smoothstep.h
+++ b/include/jemalloc/internal/smoothstep.h
@@ -23,210 +23,210 @@
  *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
  */
 
-#define SMOOTHSTEP_VARIANT	"smoother"
-#define SMOOTHSTEP_NSTEPS	200
-#define SMOOTHSTEP_BFP		24
-#define SMOOTHSTEP \
- /* STEP(step, h,                            x,     y) */ \
-    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
-    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
-    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
-    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
-    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
-    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
-    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
-    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
-    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
-    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
-    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
-    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
-    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
-    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
-    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
-    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
-    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
-    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
-    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
-    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
-    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
-    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
-    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
-    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
-    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
-    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
-    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
-    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
-    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
-    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
-    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
-    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
-    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
-    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
-    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
-    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
-    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
-    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
-    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
-    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
-    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
-    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
-    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
-    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
-    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
-    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
-    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
-    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
-    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
-    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
-    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
-    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
-    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
-    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
-    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
-    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
-    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
-    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
-    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
-    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
-    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
-    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
-    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
-    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
-    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
-    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
-    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
-    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
-    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
-    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
-    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
-    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
-    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
-    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
-    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
-    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
-    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
-    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
-    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
-    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
-    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
-    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
-    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
-    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
-    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
-    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
-    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
-    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
-    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
-    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
-    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
-    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
-    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
-    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
-    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
-    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
-    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
-    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
-    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
-    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
-    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
-    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
-    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
-    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
-    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
-    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
-    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
-    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
-    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
-    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
-    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
-    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
-    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
-    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
-    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
-    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
-    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
-    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
-    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
-    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
-    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
-    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
-    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
-    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
-    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
-    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
-    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
-    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
-    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
-    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
-    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
-    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
-    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
-    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
-    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
-    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
-    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
-    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
-    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
-    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
-    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
-    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
-    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
-    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
-    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
-    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
-    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
-    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
-    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
-    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
-    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
-    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
-    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
-    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
-    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
-    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
-    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
-    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
-    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
-    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
-    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
-    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
-    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
-    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
-    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
-    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
-    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
-    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
-    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
-    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
-    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
-    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
-    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
-    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
-    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
-    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
-    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
-    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
-    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
-    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
-    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
-    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
-    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
-    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
-    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
-    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
-    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
-    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
-    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
-    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
-    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
-    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
-    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
-    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
-    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
-    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
-    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
-    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
-    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
-    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
+#define SMOOTHSTEP_VARIANT "smoother"
+#define SMOOTHSTEP_NSTEPS 200
+#define SMOOTHSTEP_BFP 24
+#define SMOOTHSTEP                                                             \
+	/* STEP(step, h,                            x,     y) */               \
+	STEP(1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750)        \
+	STEP(2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000)        \
+	STEP(3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250)        \
+	STEP(4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000)        \
+	STEP(5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750)        \
+	STEP(6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000)        \
+	STEP(7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250)        \
+	STEP(8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000)        \
+	STEP(9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750)        \
+	STEP(10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000)       \
+	STEP(11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250)       \
+	STEP(12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000)       \
+	STEP(13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750)       \
+	STEP(14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000)       \
+	STEP(15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250)       \
+	STEP(16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000)       \
+	STEP(17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750)       \
+	STEP(18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000)       \
+	STEP(19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250)       \
+	STEP(20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000)       \
+	STEP(21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750)       \
+	STEP(22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000)       \
+	STEP(23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250)       \
+	STEP(24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000)       \
+	STEP(25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750)       \
+	STEP(26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000)       \
+	STEP(27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250)       \
+	STEP(28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000)       \
+	STEP(29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750)       \
+	STEP(30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000)       \
+	STEP(31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250)       \
+	STEP(32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000)       \
+	STEP(33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750)       \
+	STEP(34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000)       \
+	STEP(35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250)       \
+	STEP(36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000)       \
+	STEP(37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750)       \
+	STEP(38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000)       \
+	STEP(39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250)       \
+	STEP(40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000)       \
+	STEP(41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750)       \
+	STEP(42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000)       \
+	STEP(43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250)       \
+	STEP(44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000)       \
+	STEP(45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750)       \
+	STEP(46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000)       \
+	STEP(47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250)       \
+	STEP(48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000)       \
+	STEP(49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750)       \
+	STEP(50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000)       \
+	STEP(51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250)       \
+	STEP(52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000)       \
+	STEP(53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750)       \
+	STEP(54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000)       \
+	STEP(55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250)       \
+	STEP(56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000)       \
+	STEP(57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750)       \
+	STEP(58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000)       \
+	STEP(59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250)       \
+	STEP(60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000)       \
+	STEP(61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750)       \
+	STEP(62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000)       \
+	STEP(63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250)       \
+	STEP(64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000)       \
+	STEP(65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750)       \
+	STEP(66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000)       \
+	STEP(67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250)       \
+	STEP(68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000)       \
+	STEP(69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750)       \
+	STEP(70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000)       \
+	STEP(71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250)       \
+	STEP(72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000)       \
+	STEP(73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750)       \
+	STEP(74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000)       \
+	STEP(75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250)       \
+	STEP(76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000)       \
+	STEP(77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750)       \
+	STEP(78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000)       \
+	STEP(79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250)       \
+	STEP(80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000)       \
+	STEP(81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750)       \
+	STEP(82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000)       \
+	STEP(83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250)       \
+	STEP(84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000)       \
+	STEP(85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750)       \
+	STEP(86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000)       \
+	STEP(87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250)       \
+	STEP(88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000)       \
+	STEP(89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750)       \
+	STEP(90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000)       \
+	STEP(91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250)       \
+	STEP(92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000)       \
+	STEP(93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750)       \
+	STEP(94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000)       \
+	STEP(95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250)       \
+	STEP(96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000)       \
+	STEP(97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750)       \
+	STEP(98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000)       \
+	STEP(99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250)       \
+	STEP(100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000)      \
+	STEP(101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750)      \
+	STEP(102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000)      \
+	STEP(103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250)      \
+	STEP(104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000)      \
+	STEP(105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750)      \
+	STEP(106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000)      \
+	STEP(107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250)      \
+	STEP(108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000)      \
+	STEP(109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750)      \
+	STEP(110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000)      \
+	STEP(111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250)      \
+	STEP(112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000)      \
+	STEP(113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750)      \
+	STEP(114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000)      \
+	STEP(115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250)      \
+	STEP(116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000)      \
+	STEP(117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750)      \
+	STEP(118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000)      \
+	STEP(119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250)      \
+	STEP(120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000)      \
+	STEP(121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750)      \
+	STEP(122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000)      \
+	STEP(123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250)      \
+	STEP(124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000)      \
+	STEP(125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750)      \
+	STEP(126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000)      \
+	STEP(127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250)      \
+	STEP(128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000)      \
+	STEP(129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750)      \
+	STEP(130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000)      \
+	STEP(131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250)      \
+	STEP(132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000)      \
+	STEP(133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750)      \
+	STEP(134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000)      \
+	STEP(135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250)      \
+	STEP(136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000)      \
+	STEP(137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750)      \
+	STEP(138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000)      \
+	STEP(139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250)      \
+	STEP(140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000)      \
+	STEP(141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750)      \
+	STEP(142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000)      \
+	STEP(143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250)      \
+	STEP(144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000)      \
+	STEP(145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750)      \
+	STEP(146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000)      \
+	STEP(147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250)      \
+	STEP(148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000)      \
+	STEP(149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750)      \
+	STEP(150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000)      \
+	STEP(151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250)      \
+	STEP(152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000)      \
+	STEP(153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750)      \
+	STEP(154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000)      \
+	STEP(155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250)      \
+	STEP(156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000)      \
+	STEP(157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750)      \
+	STEP(158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000)      \
+	STEP(159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250)      \
+	STEP(160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000)      \
+	STEP(161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750)      \
+	STEP(162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000)      \
+	STEP(163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250)      \
+	STEP(164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000)      \
+	STEP(165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750)      \
+	STEP(166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000)      \
+	STEP(167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250)      \
+	STEP(168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000)      \
+	STEP(169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750)      \
+	STEP(170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000)      \
+	STEP(171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250)      \
+	STEP(172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000)      \
+	STEP(173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750)      \
+	STEP(174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000)      \
+	STEP(175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250)      \
+	STEP(176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000)      \
+	STEP(177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750)      \
+	STEP(178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000)      \
+	STEP(179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250)      \
+	STEP(180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000)      \
+	STEP(181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750)      \
+	STEP(182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000)      \
+	STEP(183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250)      \
+	STEP(184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000)      \
+	STEP(185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750)      \
+	STEP(186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000)      \
+	STEP(187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250)      \
+	STEP(188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000)      \
+	STEP(189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750)      \
+	STEP(190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000)      \
+	STEP(191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250)      \
+	STEP(192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000)      \
+	STEP(193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750)      \
+	STEP(194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000)      \
+	STEP(195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250)      \
+	STEP(196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000)      \
+	STEP(197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750)      \
+	STEP(198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000)      \
+	STEP(199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250)      \
+	STEP(200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000)
 
 #endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index 22804c68..4cd5e1db 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -1,20 +1,23 @@
 #ifndef JEMALLOC_INTERNAL_SPIN_H
 #define JEMALLOC_INTERNAL_SPIN_H
 
-#define SPIN_INITIALIZER {0U}
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#define SPIN_INITIALIZER                                                       \
+	{ 0U }
 
 typedef struct {
 	unsigned iteration;
 } spin_t;
 
 static inline void
-spin_cpu_spinwait() {
-#  if HAVE_CPU_SPINWAIT
+spin_cpu_spinwait(void) {
+#if HAVE_CPU_SPINWAIT
 	CPU_SPINWAIT;
-#  else
+#else
 	volatile int x = 0;
 	x = x;
-#  endif
+#endif
 }
 
 static inline void
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 727f7dcb..1c7b23e0 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -1,33 +1,38 @@
 #ifndef JEMALLOC_INTERNAL_STATS_H
 #define JEMALLOC_INTERNAL_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
-#define STATS_PRINT_OPTIONS						\
-    OPTION('J',		json,		false,		true)		\
-    OPTION('g',		general,	true,		false)		\
-    OPTION('m',		merged,		config_stats,	false)		\
-    OPTION('d',		destroyed,	config_stats,	false)		\
-    OPTION('a',		unmerged,	config_stats,	false)		\
-    OPTION('b',		bins,		true,		false)		\
-    OPTION('l',		large,		true,		false)		\
-    OPTION('x',		mutex,		true,		false)		\
-    OPTION('e',		extents,	true,		false)		\
-    OPTION('h',		hpa,		config_stats,	false)
+#define STATS_PRINT_OPTIONS                                                    \
+	OPTION('J', json, false, true)                                         \
+	OPTION('g', general, true, false)                                      \
+	OPTION('m', merged, config_stats, false)                               \
+	OPTION('d', destroyed, config_stats, false)                            \
+	OPTION('a', unmerged, config_stats, false)                             \
+	OPTION('b', bins, true, false)                                         \
+	OPTION('l', large, true, false)                                        \
+	OPTION('x', mutex, true, false)                                        \
+	OPTION('e', extents, true, false)                                      \
+	OPTION('h', hpa, config_stats, false)
 
 enum {
 #define OPTION(o, v, d, s) stats_print_option_num_##v,
-    STATS_PRINT_OPTIONS
+	STATS_PRINT_OPTIONS
 #undef OPTION
-    stats_print_tot_num_options
+	    stats_print_tot_num_options
 };
 
 /* Options for stats_print. */
 extern bool opt_stats_print;
-extern char opt_stats_print_opts[stats_print_tot_num_options+1];
+extern char opt_stats_print_opts[stats_print_tot_num_options + 1];
 
 /* Utilities for stats_interval. */
 extern int64_t opt_stats_interval;
-extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
+extern char    opt_stats_interval_opts[stats_print_tot_num_options + 1];
 
 #define STATS_INTERVAL_DEFAULT -1
 /*
@@ -39,9 +44,7 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 #define STATS_INTERVAL_ACCUM_BATCH_MAX (4 << 20)
 
 /* Only accessed by thread event. */
-uint64_t stats_interval_new_event_wait(tsd_t *tsd);
-uint64_t stats_interval_postponed_event_wait(tsd_t *tsd);
-void stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t stats_interval_te_handler;
 
 /* Implements je_malloc_stats_print. */
 void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 3c0fc1da..d75a3034 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SIZE_H
 #define JEMALLOC_INTERNAL_SIZE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/sc.h"
@@ -53,6 +54,11 @@ extern size_t sz_large_pad;
 
 extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
+JEMALLOC_ALWAYS_INLINE bool
+sz_large_size_classes_disabled(void) {
+	return opt_disable_large_size_classes;
+}
+
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
 	assert(psz > 0);
@@ -70,8 +76,9 @@ sz_psz2ind(size_t psz) {
 	 * SC_NGROUP. off_to_first_ps_rg begins from 1, instead of 0. e.g.
 	 * off_to_first_ps_rg is 1 when psz is (PAGE * SC_NGROUP + 1).
 	 */
-	pszind_t off_to_first_ps_rg = (x < SC_LG_NGROUP + LG_PAGE) ?
-	    0 : x - (SC_LG_NGROUP + LG_PAGE);
+	pszind_t off_to_first_ps_rg = (x < SC_LG_NGROUP + LG_PAGE)
+	    ? 0
+	    : x - (SC_LG_NGROUP + LG_PAGE);
 
 	/*
 	 * Same as sc_s::lg_delta.
@@ -79,8 +86,9 @@ sz_psz2ind(size_t psz) {
 	 * for each increase in offset, it's multiplied by two.
 	 * Therefore, lg_delta = LG_PAGE + (off_to_first_ps_rg - 1).
 	 */
-	pszind_t lg_delta = (off_to_first_ps_rg == 0) ?
-	    LG_PAGE : LG_PAGE + (off_to_first_ps_rg - 1);
+	pszind_t lg_delta = (off_to_first_ps_rg == 0)
+	    ? LG_PAGE
+	    : LG_PAGE + (off_to_first_ps_rg - 1);
 
 	/*
 	 * Let's write psz in binary, e.g. 0011 for 0x3, 0111 for 0x7.
@@ -112,13 +120,13 @@ sz_pind2sz_compute(pszind_t pind) {
 	size_t grp = pind >> SC_LG_NGROUP;
 	size_t mod = pind & ((ZU(1) << SC_LG_NGROUP) - 1);
 
-	size_t grp_size_mask = ~((!!grp)-1);
-	size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP-1))) << grp)
+	size_t grp_size_mask = ~((!!grp) - 1);
+	size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP - 1))) << grp)
 	    & grp_size_mask;
 
 	size_t shift = (grp == 0) ? 1 : grp;
-	size_t lg_delta = shift + (LG_PAGE-1);
-	size_t mod_size = (mod+1) << lg_delta;
+	size_t lg_delta = shift + (LG_PAGE - 1);
+	size_t mod_size = (mod + 1) << lg_delta;
 
 	size_t sz = grp_size + mod_size;
 	return sz;
@@ -142,17 +150,18 @@ sz_psz2u(size_t psz) {
 	if (unlikely(psz > SC_LARGE_MAXCLASS)) {
 		return SC_LARGE_MAXCLASS + PAGE;
 	}
-	size_t x = lg_floor((psz<<1)-1);
-	size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
-	    LG_PAGE : x - SC_LG_NGROUP - 1;
+	size_t x = lg_floor((psz << 1) - 1);
+	size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1)
+	    ? LG_PAGE
+	    : x - SC_LG_NGROUP - 1;
 	size_t delta = ZU(1) << lg_delta;
 	size_t delta_mask = delta - 1;
 	size_t usize = (psz + delta_mask) & ~delta_mask;
 	return usize;
 }
 
-static inline szind_t
-sz_size2index_compute(size_t size) {
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index_compute_inline(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		return SC_NSIZES;
 	}
@@ -168,23 +177,30 @@ sz_size2index_compute(size_t size) {
 	}
 #endif
 	{
-		szind_t x = lg_floor((size<<1)-1);
-		szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM) ? 0 :
-		    x - (SC_LG_NGROUP + LG_QUANTUM);
+		szind_t x = lg_floor((size << 1) - 1);
+		szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM)
+		    ? 0
+		    : x - (SC_LG_NGROUP + LG_QUANTUM);
 		szind_t grp = shift << SC_LG_NGROUP;
 
 		szind_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
-		    ? LG_QUANTUM : x - SC_LG_NGROUP - 1;
+		    ? LG_QUANTUM
+		    : x - SC_LG_NGROUP - 1;
 
-		size_t delta_inverse_mask = ZU(-1) << lg_delta;
-		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << SC_LG_NGROUP) - 1);
+		size_t  delta_inverse_mask = ZU(-1) << lg_delta;
+		szind_t mod = ((((size - 1) & delta_inverse_mask) >> lg_delta))
+		    & ((ZU(1) << SC_LG_NGROUP) - 1);
 
 		szind_t index = SC_NTINY + grp + mod;
 		return index;
 	}
 }
 
+static inline szind_t
+sz_size2index_compute(size_t size) {
+	return sz_size2index_compute_inline(size);
+}
+
 JEMALLOC_ALWAYS_INLINE szind_t
 sz_size2index_lookup_impl(size_t size) {
 	assert(size <= SC_LOOKUP_MAXCLASS);
@@ -207,8 +223,8 @@ sz_size2index(size_t size) {
 	return sz_size2index_compute(size);
 }
 
-static inline size_t
-sz_index2size_compute(szind_t index) {
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size_compute_inline(szind_t index) {
 #if (SC_NTINY > 0)
 	if (index < SC_NTINY) {
 		return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index));
@@ -217,22 +233,27 @@ sz_index2size_compute(szind_t index) {
 	{
 		size_t reduced_index = index - SC_NTINY;
 		size_t grp = reduced_index >> SC_LG_NGROUP;
-		size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) -
-		    1);
+		size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) - 1);
 
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
-		    (SC_LG_NGROUP-1))) << grp) & grp_size_mask;
+		size_t grp_size_mask = ~((!!grp) - 1);
+		size_t grp_size = ((ZU(1) << (LG_QUANTUM + (SC_LG_NGROUP - 1)))
+		                      << grp)
+		    & grp_size_mask;
 
 		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_QUANTUM-1);
-		size_t mod_size = (mod+1) << lg_delta;
+		size_t lg_delta = shift + (LG_QUANTUM - 1);
+		size_t mod_size = (mod + 1) << lg_delta;
 
 		size_t usize = grp_size + mod_size;
 		return usize;
 	}
 }
 
+static inline size_t
+sz_index2size_compute(szind_t index) {
+	return sz_index2size_compute_inline(index);
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size_lookup_impl(szind_t index) {
 	return sz_index2size_tab[index];
@@ -246,15 +267,61 @@ sz_index2size_lookup(szind_t index) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-sz_index2size(szind_t index) {
+sz_index2size_unsafe(szind_t index) {
 	assert(index < SC_NSIZES);
 	return sz_index2size_lookup(index);
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size(szind_t index) {
+	assert(!sz_large_size_classes_disabled()
+	    || index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
+	size_t size = sz_index2size_unsafe(index);
+	/*
+	 * With large size classes disabled, the usize above
+	 * SC_LARGE_MINCLASS should grow by PAGE.  However, for sizes
+	 * in [SC_LARGE_MINCLASS, USIZE_GROW_SLOW_THRESHOLD], the
+	 * usize would not change because the size class gap in this
+	 * range is just the same as PAGE.  Although we use
+	 * SC_LARGE_MINCLASS as the threshold in most places, we
+	 * allow tcache and sec to cache up to
+	 * USIZE_GROW_SLOW_THRESHOLD to minimize the side effect of
+	 * not having size classes for larger sizes.  Thus, we assert
+	 * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here
+	 * instead of SC_LARGE_MINCLASS.
+	 */
+	assert(!sz_large_size_classes_disabled()
+	    || size <= USIZE_GROW_SLOW_THRESHOLD);
+	return size;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
-	*ind = sz_size2index_lookup_impl(size);
-	*usize = sz_index2size_lookup_impl(*ind);
+	if (util_compile_time_const(size)) {
+		/*
+		 * When inlined, the size may become known at compile
+		 * time, which allows static computation through LTO.
+		 */
+		*ind = sz_size2index_compute_inline(size);
+		assert(*ind == sz_size2index_lookup_impl(size));
+		*usize = sz_index2size_compute_inline(*ind);
+		assert(*usize == sz_index2size_lookup_impl(*ind));
+	} else {
+		*ind = sz_size2index_lookup_impl(size);
+		*usize = sz_index2size_lookup_impl(*ind);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_compute_using_delta(size_t size) {
+	size_t x = lg_floor((size << 1) - 1);
+	size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
+	    ? LG_QUANTUM
+	    : x - SC_LG_NGROUP - 1;
+	size_t delta = ZU(1) << lg_delta;
+	size_t delta_mask = delta - 1;
+	size_t usize = (size + delta_mask) & ~delta_mask;
+	return usize;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -270,23 +337,28 @@ sz_s2u_compute(size_t size) {
 	if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
 		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
-		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
-		    (ZU(1) << lg_ceil));
+		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin)
+		                          : (ZU(1) << lg_ceil));
 	}
 #endif
-	{
-		size_t x = lg_floor((size<<1)-1);
-		size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
-		    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (size + delta_mask) & ~delta_mask;
+	if (size <= SC_SMALL_MAXCLASS || !sz_large_size_classes_disabled()) {
+		return sz_s2u_compute_using_delta(size);
+	} else {
+		/*
+		 * With sz_large_size_classes_disabled() == true, usize of a large
+		 * allocation is calculated by ceiling size to the smallest
+		 * multiple of PAGE to minimize the memory overhead, especially
+		 * when using hugepages.
+		 */
+		size_t usize = PAGE_CEILING(size);
+		assert(usize - size < PAGE);
 		return usize;
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_lookup(size_t size) {
+	assert(size < SC_LARGE_MINCLASS);
 	size_t ret = sz_index2size_lookup(sz_size2index_lookup(size));
 
 	assert(ret == sz_s2u_compute(size));
@@ -365,6 +437,21 @@ sz_sa2u(size_t size, size_t alignment) {
 	return usize;
 }
 
+/*
+ * Under normal circumstances, whether or not to use a slab
+ * to satisfy an allocation depends solely on the allocation's
+ * effective size. However, this is *not* the case when an allocation
+ * is sampled for profiling, in which case you *must not* use a slab
+ * regardless of the effective size. Thus `sz_can_use_slab` is called
+ * on the common path, but there exist `*_explicit_slab` variants of
+ * several functions for handling the aforementioned case of
+ * sampled allocations.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+sz_can_use_slab(size_t size) {
+	return size <= SC_SMALL_MAXCLASS;
+}
+
 size_t sz_psz_quantize_floor(size_t size);
 size_t sz_psz_quantize_ceil(size_t size);
 
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index a2ab7101..b7fdb5a4 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -1,28 +1,40 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
-extern bool opt_tcache;
-extern size_t opt_tcache_max;
-extern ssize_t	opt_lg_tcache_nslots_mul;
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
+
+extern bool     opt_tcache;
+extern size_t   opt_tcache_max;
+extern ssize_t  opt_lg_tcache_nslots_mul;
 extern unsigned opt_tcache_nslots_small_min;
 extern unsigned opt_tcache_nslots_small_max;
 extern unsigned opt_tcache_nslots_large;
-extern ssize_t opt_lg_tcache_shift;
-extern size_t opt_tcache_gc_incr_bytes;
-extern size_t opt_tcache_gc_delay_bytes;
+extern ssize_t  opt_lg_tcache_shift;
+extern size_t   opt_tcache_gc_incr_bytes;
+extern size_t   opt_tcache_gc_delay_bytes;
 extern unsigned opt_lg_tcache_flush_small_div;
 extern unsigned opt_lg_tcache_flush_large_div;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
- * large-object bins.
+ * large-object bins.  This is only used during threads initialization and
+ * changing it will not reflect on initialized threads as expected.  Thus,
+ * it should not be changed on the fly.  To change the number of tcache bins
+ * in use, refer to tcache_nbins of each tcache.
  */
-extern unsigned	nhbins;
+extern unsigned global_do_not_change_tcache_nbins;
 
-/* Maximum cached size class. */
-extern size_t	tcache_maxclass;
-
-extern cache_bin_info_t *tcache_bin_info;
+/*
+ * Maximum cached size class.  Same as above, this is only used during threads
+ * initialization and should not be changed.  To change the maximum cached size
+ * class, refer to tcache_max of each tcache.
+ */
+extern size_t global_do_not_change_tcache_maxclass;
 
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
@@ -32,44 +44,47 @@ extern cache_bin_info_t *tcache_bin_info;
  * touched.  This allows the entire array to be allocated the first time an
  * explicit tcache is created without a disproportionate impact on memory usage.
  */
-extern tcaches_t	*tcaches;
+extern tcaches_t *tcaches;
 
 size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+void  *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+     cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
 
-void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
-    szind_t binind, unsigned rem);
-void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
-    szind_t binind, unsigned rem);
-void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *bin,
-    szind_t binind, bool is_small);
-void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache, arena_t *arena);
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, unsigned rem);
+void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, unsigned rem);
+void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, bool is_small);
+bool tcache_bin_info_default_init(
+    const char *bin_settings_segment_cur, size_t len_left);
+bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
+bool tcache_bin_ncached_max_read(
+    tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max);
+void tcache_arena_reassociate(
+    tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void tcache_cleanup(tsd_t *tsd);
-void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
-void tcaches_flush(tsd_t *tsd, unsigned ind);
-void tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool tcache_boot(tsdn_t *tsdn, base_t *base);
-void tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache, arena_t *arena);
+bool      thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
+void      tcache_cleanup(tsd_t *tsd);
+void      tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool      tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
+void      tcaches_flush(tsd_t *tsd, unsigned ind);
+void      tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool      tcache_boot(tsdn_t *tsdn, base_t *base);
+void      tcache_arena_associate(
+         tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
 void tcache_flush(tsd_t *tsd);
-bool tsd_tcache_data_init(tsd_t *tsd);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
+void tcache_enabled_set(tsd_t *tsd, bool enabled);
+
+extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
+    size_t alignment);
 
 void tcache_assert_initialized(tcache_t *tcache);
 
-/* Only accessed by thread event. */
-uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
-uint64_t tcache_gc_postponed_event_wait(tsd_t *tsd);
-void tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed);
-uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
-uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
-void tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t tcache_gc_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 2634f145..5f8ed317 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,11 +1,16 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/san.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/util.h"
 
 static inline bool
@@ -13,39 +18,79 @@ tcache_enabled_get(tsd_t *tsd) {
 	return tsd_tcache_enabled_get(tsd);
 }
 
-static inline void
-tcache_enabled_set(tsd_t *tsd, bool enabled) {
-	bool was_enabled = tsd_tcache_enabled_get(tsd);
+static inline unsigned
+tcache_nbins_get(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
+	unsigned nbins = tcache_slow->tcache_nbins;
+	assert(nbins <= TCACHE_NBINS_MAX);
+	return nbins;
+}
 
-	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd);
-	} else if (was_enabled && !enabled) {
-		tcache_cleanup(tsd);
+static inline size_t
+tcache_max_get(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
+	size_t tcache_max = sz_index2size(tcache_nbins_get(tcache_slow) - 1);
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	return tcache_max;
+}
+
+static inline void
+tcache_max_set(tcache_slow_t *tcache_slow, size_t tcache_max) {
+	assert(tcache_slow != NULL);
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	tcache_slow->tcache_nbins = sz_size2index(tcache_max) + 1;
+}
+
+static inline void
+tcache_bin_settings_backup(
+    tcache_t *tcache, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i],
+		    cache_bin_ncached_max_get_unsafe(&tcache->bins[i]));
 	}
-	/* Commit the state last.  Above calls check current state. */
-	tsd_tcache_enabled_set(tsd, enabled);
-	tsd_slow_update(tsd);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
-	assert(ind < SC_NBINS);
-	bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
-	if (ret && bin != NULL) {
-		/* small size class but cache bin disabled. */
-		assert(ind >= nhbins);
-		assert((uintptr_t)(*bin->stack_head) ==
-		    cache_bin_preceding_junk);
+tcache_bin_disabled(szind_t ind, cache_bin_t *bin, tcache_slow_t *tcache_slow) {
+	assert(bin != NULL);
+	assert(ind < TCACHE_NBINS_MAX);
+	bool disabled = cache_bin_disabled(bin);
+
+	/*
+	 * If a bin's ind >= nbins or ncached_max == 0, it must be disabled.
+	 * However, when ind < nbins, it could be either enabled
+	 * (ncached_max > 0) or disabled (ncached_max == 0). Similarly, when
+	 * ncached_max > 0, it could be either enabled (ind < nbins) or
+	 * disabled (ind >= nbins).  Thus, if a bin is disabled, it has either
+	 * ind >= nbins or ncached_max == 0.  If a bin is enabled, it has
+	 * ind < nbins and ncached_max > 0.
+	 */
+	unsigned       nbins = tcache_nbins_get(tcache_slow);
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get_unsafe(bin);
+	if (ind >= nbins) {
+		assert(disabled);
+	} else {
+		assert(!disabled || ncached_max == 0);
+	}
+	if (ncached_max == 0) {
+		assert(disabled);
+	} else {
+		assert(!disabled || ind >= nbins);
+	}
+	if (disabled) {
+		assert(ind >= nbins || ncached_max == 0);
+	} else {
+		assert(ind < nbins && ncached_max > 0);
 	}
 
-	return ret;
+	return disabled;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    size_t size, szind_t binind, bool zero, bool slow_path) {
+tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+    szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	bool tcache_success;
+	bool  tcache_success;
 
 	assert(binind < SC_NBINS);
 	cache_bin_t *bin = &tcache->bins[binind];
@@ -57,16 +102,17 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
-		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+		if (unlikely(tcache_bin_disabled(
+		        binind, bin, tcache->tcache_slow))) {
 			/* stats and zero are handled directly by the arena. */
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
-			    binind, zero);
+			    binind, zero, /* slab */ true);
 		}
 		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
 		    /* is_small */ true);
 
-		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
-		    bin, binind, &tcache_hard_success);
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, bin,
+		    binind, &tcache_hard_success);
 		if (tcache_hard_success == false) {
 			return NULL;
 		}
@@ -88,10 +134,11 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	bool tcache_success;
+	bool  tcache_success;
 
-	assert(binind >= SC_NBINS && binind < nhbins);
 	cache_bin_t *bin = &tcache->bins[binind];
+	assert(binind >= SC_NBINS
+	    && !tcache_bin_disabled(binind, bin, tcache->tcache_slow));
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -113,21 +160,21 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	} else {
 		if (unlikely(zero)) {
 			size_t usize = sz_index2size(binind);
-			assert(usize <= tcache_maxclass);
+			assert(usize <= tcache_max_get(tcache->tcache_slow));
 			memset(ret, 0, usize);
 		}
+	}
 
-		if (config_stats) {
-			bin->tstats.nrequests++;
-		}
+	if (config_stats) {
+		bin->tstats.nrequests++;
 	}
 
 	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
-    bool slow_path) {
+tcache_dalloc_small(
+    tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) {
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
 
 	cache_bin_t *bin = &tcache->bins[binind];
@@ -147,13 +194,13 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+		if (unlikely(tcache_bin_disabled(
+		        binind, bin, tcache->tcache_slow))) {
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
 			return;
 		}
-		cache_bin_sz_t max = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]);
-		unsigned remain = max >> opt_lg_tcache_flush_small_div;
+		cache_bin_sz_t max = cache_bin_ncached_max_get(bin);
+		unsigned       remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -161,17 +208,18 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
-    bool slow_path) {
-
+tcache_dalloc_large(
+    tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) {
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
-	    > SC_SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
+	    <= tcache_max_get(tcache->tcache_slow));
+	assert(!tcache_bin_disabled(
+	    binind, &tcache->bins[binind], tcache->tcache_slow));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_large_div;
+		unsigned remain = cache_bin_ncached_max_get(bin)
+		    >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 176d73de..2c000de3 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -1,11 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/ticker.h"
-#include "jemalloc/internal/tsd_types.h"
 
 /*
  * The tcache state is split into the slow and hot path data.  Each has a
@@ -29,39 +30,45 @@ struct tcache_slow_s {
 	cache_bin_array_descriptor_t cache_bin_array_descriptor;
 
 	/* The arena this tcache is associated with. */
-	arena_t		*arena;
+	arena_t *arena;
+	/* The number of bins activated in the tcache. */
+	unsigned tcache_nbins;
+	/* Last time GC has been performed.  */
+	nstime_t last_gc_time;
 	/* Next bin to GC. */
-	szind_t		next_gc_bin;
-	/* For small bins, fill (ncached_max >> lg_fill_div). */
-	uint8_t		lg_fill_div[SC_NBINS];
+	szind_t next_gc_bin;
+	szind_t next_gc_bin_small;
+	szind_t next_gc_bin_large;
+	/* For small bins, help determine how many items to fill at a time. */
+	cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
-	bool		bin_refilled[SC_NBINS];
+	bool bin_refilled[SC_NBINS];
 	/*
 	 * For small bins, the number of items we can pretend to flush before
 	 * actually flushing.
 	 */
-	uint8_t		bin_flush_delay_items[SC_NBINS];
+	uint8_t bin_flush_delay_items[SC_NBINS];
 	/*
 	 * The start of the allocation containing the dynamic allocation for
 	 * either the cache bins alone, or the cache bin memory as well as this
 	 * tcache_slow_t and its associated tcache_t.
 	 */
-	void		*dyn_alloc;
+	void *dyn_alloc;
 
 	/* The associated bins. */
-	tcache_t	*tcache;
+	tcache_t *tcache;
 };
 
 struct tcache_s {
-	tcache_slow_t	*tcache_slow;
-	cache_bin_t	bins[TCACHE_NBINS_MAX];
+	tcache_slow_t *tcache_slow;
+	cache_bin_t    bins[TCACHE_NBINS_MAX];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
 struct tcaches_s {
 	union {
-		tcache_t	*tcache;
-		tcaches_t	*next;
+		tcache_t  *tcache;
+		tcaches_t *next;
 	};
 };
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 583677ea..27d80d3c 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -1,35 +1,37 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
 #define JEMALLOC_INTERNAL_TCACHE_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 typedef struct tcache_slow_s tcache_slow_t;
-typedef struct tcache_s tcache_t;
-typedef struct tcaches_s tcaches_t;
-
-/*
- * tcache pointers close to NULL are used to encode state information that is
- * used for two purposes: preventing thread caching on a per thread basis and
- * cleaning up during thread shutdown.
- */
-#define TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
-#define TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
-#define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
-#define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
+typedef struct tcache_s      tcache_t;
+typedef struct tcaches_s     tcaches_t;
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
-#define TCACHE_ZERO_INITIALIZER {0}
-#define TCACHE_SLOW_ZERO_INITIALIZER {0}
+#define TCACHE_ZERO_INITIALIZER                                                \
+	{ 0 }
+#define TCACHE_SLOW_ZERO_INITIALIZER                                           \
+	{                                                                      \
+		{ 0 }                                                          \
+	}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
 
 /* Used for explicit tcache only. Means flushed but not destroyed. */
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
-#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
+#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
-#define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
-    (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
+#define TCACHE_NBINS_MAX                                                       \
+	(SC_NBINS                                                              \
+	    + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS)    \
+	    + 1)
+#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21)       /* 2M */
+#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
+#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
+#define TCACHE_GC_LARGE_NBINS_MAX 1
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index 3d530b5c..35f3a211 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -1,24 +1,28 @@
 #ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H
 #define JEMALLOC_INTERNAL_TEST_HOOKS_H
 
-extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)();
-extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)();
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void);
+extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void);
 
 #if defined(JEMALLOC_JET) || defined(JEMALLOC_UNIT_TEST)
-#  define JEMALLOC_TEST_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+#	define JEMALLOC_TEST_HOOK(fn, hook)                                   \
+		((void)(hook != NULL && (hook(), 0)), fn)
 
-#  define open JEMALLOC_TEST_HOOK(open, test_hooks_libc_hook)
-#  define read JEMALLOC_TEST_HOOK(read, test_hooks_libc_hook)
-#  define write JEMALLOC_TEST_HOOK(write, test_hooks_libc_hook)
-#  define readlink JEMALLOC_TEST_HOOK(readlink, test_hooks_libc_hook)
-#  define close JEMALLOC_TEST_HOOK(close, test_hooks_libc_hook)
-#  define creat JEMALLOC_TEST_HOOK(creat, test_hooks_libc_hook)
-#  define secure_getenv JEMALLOC_TEST_HOOK(secure_getenv, test_hooks_libc_hook)
+#	define open JEMALLOC_TEST_HOOK(open, test_hooks_libc_hook)
+#	define read JEMALLOC_TEST_HOOK(read, test_hooks_libc_hook)
+#	define write JEMALLOC_TEST_HOOK(write, test_hooks_libc_hook)
+#	define readlink JEMALLOC_TEST_HOOK(readlink, test_hooks_libc_hook)
+#	define close JEMALLOC_TEST_HOOK(close, test_hooks_libc_hook)
+#	define creat JEMALLOC_TEST_HOOK(creat, test_hooks_libc_hook)
+#	define secure_getenv                                                  \
+		JEMALLOC_TEST_HOOK(secure_getenv, test_hooks_libc_hook)
 /* Note that this is undef'd and re-define'd in src/prof.c. */
-#  define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#	define _Unwind_Backtrace                                              \
+		JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #else
-#  define JEMALLOC_TEST_HOOK(fn, hook) fn
+#	define JEMALLOC_TEST_HOOK(fn, hook) fn
 #endif
 
-
 #endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 2f4e1b39..e9e2b6cd 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_THREAD_EVENT_H
 #define JEMALLOC_INTERNAL_THREAD_EVENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /* "te" is short for "thread_event" */
@@ -36,7 +37,7 @@
 #define TE_INVALID_ELAPSED UINT64_MAX
 
 typedef struct te_ctx_s {
-	bool is_alloc;
+	bool      is_alloc;
 	uint64_t *current;
 	uint64_t *last_event;
 	uint64_t *next_event;
@@ -47,36 +48,20 @@ void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
-
-/*
- * List of all events, in the following format:
- *  E(event,		(condition), is_alloc_event)
- */
-#define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,		(opt_tcache_gc_incr_bytes > 0), true)	\
-    E(prof_sample,		(config_prof && opt_prof), true)  	\
-    E(stats_interval,		(opt_stats_interval >= 0), true)   	\
-    E(tcache_gc_dalloc,		(opt_tcache_gc_incr_bytes > 0), false)	\
-    E(peak_alloc,		config_stats, true)			\
-    E(peak_dalloc,		config_stats, false)
-
-#define E(event, condition_unused, is_alloc_event_unused)		\
-    C(event##_event_wait)
+void te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait);
 
 /* List of all thread event counters. */
-#define ITERATE_OVER_ALL_COUNTERS					\
-    C(thread_allocated)							\
-    C(thread_allocated_last_event)					\
-    ITERATE_OVER_ALL_EVENTS						\
-    C(prof_sample_last_event)						\
-    C(stats_interval_last_event)
+#define ITERATE_OVER_ALL_COUNTERS                                              \
+	C(thread_allocated)                                                    \
+	C(thread_allocated_last_event)                                         \
+	C(prof_sample_last_event)                                              \
+	C(stats_interval_last_event)
 
 /* Getters directly wrap TSD getters. */
-#define C(counter)							\
-JEMALLOC_ALWAYS_INLINE uint64_t						\
-counter##_get(tsd_t *tsd) {						\
-	return tsd_##counter##_get(tsd);				\
-}
+#define C(counter)                                                             \
+	JEMALLOC_ALWAYS_INLINE uint64_t counter##_get(tsd_t *tsd) {            \
+		return tsd_##counter##_get(tsd);                               \
+	}
 
 ITERATE_OVER_ALL_COUNTERS
 #undef C
@@ -88,21 +73,14 @@ ITERATE_OVER_ALL_COUNTERS
  * temporarily delay the event and let it be immediately triggered at the next
  * allocation call.
  */
-#define C(counter)							\
-JEMALLOC_ALWAYS_INLINE void						\
-counter##_set(tsd_t *tsd, uint64_t v) {					\
-	*tsd_##counter##p_get(tsd) = v;					\
-}
+#define C(counter)                                                             \
+	JEMALLOC_ALWAYS_INLINE void counter##_set(tsd_t *tsd, uint64_t v) {    \
+		*tsd_##counter##p_get(tsd) = v;                                \
+	}
 
 ITERATE_OVER_ALL_COUNTERS
 #undef C
 
-/*
- * For generating _event_wait getter / setter functions for each individual
- * event.
- */
-#undef E
-
 /*
  * The malloc and free fastpath getters -- use the unsafe getters since tsd may
  * be non-nominal, in which case the fast_threshold will be set to 0.  This
@@ -219,57 +197,6 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	}
 }
 
-/*
- * The lookahead functionality facilitates events to be able to lookahead, i.e.
- * without touching the event counters, to determine whether an event would be
- * triggered.  The event counters are not advanced until the end of the
- * allocation / deallocation calls, so the lookahead can be useful if some
- * preparation work for some event must be done early in the allocation /
- * deallocation calls.
- *
- * Currently only the profiling sampling event needs the lookahead
- * functionality, so we don't yet define general purpose lookahead functions.
- *
- * Surplus is a terminology referring to the amount of bytes beyond what's
- * needed for triggering an event, which can be a useful quantity to have in
- * general when lookahead is being called.
- */
-
-JEMALLOC_ALWAYS_INLINE bool
-te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
-    size_t *surplus) {
-	if (surplus != NULL) {
-		/*
-		 * This is a dead store: the surplus will be overwritten before
-		 * any read.  The initialization suppresses compiler warnings.
-		 * Meanwhile, using SIZE_MAX to initialize is good for
-		 * debugging purpose, because a valid surplus value is strictly
-		 * less than usize, which is at most SIZE_MAX.
-		 */
-		*surplus = SIZE_MAX;
-	}
-	if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
-		return false;
-	}
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
-	    tsd_thread_allocated_last_event_get(tsd);
-	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
-	if (accumbytes < sample_wait) {
-		return false;
-	}
-	assert(accumbytes - sample_wait < (uint64_t)usize);
-	if (surplus != NULL) {
-		*surplus = (size_t)(accumbytes - sample_wait);
-	}
-	return true;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
-	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
 	te_assert_invariants(tsd);
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
new file mode 100644
index 00000000..bfb140aa
--- /dev/null
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -0,0 +1,107 @@
+#ifndef JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H
+#define JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_types.h"
+
+#define TE_MAX_USER_EVENTS 4
+
+/* "te" is short for "thread_event" */
+enum te_alloc_e {
+#ifdef JEMALLOC_PROF
+	te_alloc_prof_sample,
+#endif
+	te_alloc_stats_interval,
+	te_alloc_tcache_gc,
+#ifdef JEMALLOC_STATS
+	te_alloc_peak,
+#endif
+	te_alloc_user0,
+	te_alloc_user1,
+	te_alloc_user2,
+	te_alloc_user3,
+	te_alloc_last = te_alloc_user3,
+	te_alloc_count = te_alloc_last + 1
+};
+typedef enum te_alloc_e te_alloc_t;
+
+enum te_dalloc_e {
+	te_dalloc_tcache_gc,
+#ifdef JEMALLOC_STATS
+	te_dalloc_peak,
+#endif
+	te_dalloc_user0,
+	te_dalloc_user1,
+	te_dalloc_user2,
+	te_dalloc_user3,
+	te_dalloc_last = te_dalloc_user3,
+	te_dalloc_count = te_dalloc_last + 1
+};
+typedef enum te_dalloc_e te_dalloc_t;
+
+/* These will live in tsd */
+typedef struct te_data_s te_data_t;
+struct te_data_s {
+	uint64_t alloc_wait[te_alloc_count];
+	uint64_t dalloc_wait[te_dalloc_count];
+};
+#define TE_DATA_INITIALIZER                                                    \
+	{                                                                      \
+		{0}, {                                                         \
+			0                                                      \
+		}                                                              \
+	}
+
+/*
+ * Check if user event is installed, installed and enabled, or not
+ * installed.
+ *
+ */
+enum te_enabled_e { te_enabled_not_installed, te_enabled_yes, te_enabled_no };
+typedef enum te_enabled_e te_enabled_t;
+
+typedef struct te_base_cb_s te_base_cb_t;
+struct te_base_cb_s {
+	te_enabled_t (*enabled)(void);
+	uint64_t (*new_event_wait)(tsd_t *tsd);
+	uint64_t (*postponed_event_wait)(tsd_t *tsd);
+	void (*event_handler)(tsd_t *tsd);
+};
+
+extern te_base_cb_t *te_alloc_handlers[te_alloc_count];
+extern te_base_cb_t *te_dalloc_handlers[te_dalloc_count];
+
+bool experimental_thread_events_boot(void);
+
+/*
+ *  User callback for thread events
+ *
+ *  is_alloc - true if event is allocation, false if event is free
+ *  tallocated  - number of bytes allocated on current thread so far
+ *  tdallocated - number of bytes allocated on current thread so far
+ */
+typedef void (*user_event_cb_t)(
+    bool is_alloc, uint64_t tallocated, uint64_t tdallocated);
+
+typedef struct user_hook_object_s user_hook_object_t;
+struct user_hook_object_s {
+	user_event_cb_t callback;
+	uint64_t        interval;
+	bool            is_alloc_only;
+};
+
+/*
+ * register user callback
+ *
+ * return zero if event was registered
+ *
+ * if interval is zero or callback is NULL, or
+ * no more slots are available event will not be registered
+ * and non-zero value will be returned
+ *
+ */
+int te_register_user_handler(tsdn_t *tsdn, user_hook_object_t *te_uobj);
+
+te_enabled_t te_user_event_enabled(size_t ue_idx, bool is_alloc);
+
+#endif /* JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H */
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index 6b51ddec..a1eec628 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TICKER_H
 #define JEMALLOC_INTERNAL_TICKER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/util.h"
 
@@ -52,28 +53,32 @@ ticker_read(const ticker_t *ticker) {
  * worth the hassle, but this is on the fast path of both malloc and free (via
  * tcache_event).
  */
-#if defined(__GNUC__) && !defined(__clang__)				\
+#if defined(__GNUC__) && !defined(__clang__)                                   \
     && (defined(__x86_64__) || defined(__i386__))
 JEMALLOC_NOINLINE
 #endif
 static bool
-ticker_fixup(ticker_t *ticker) {
+ticker_fixup(ticker_t *ticker, bool delay_trigger) {
+	if (delay_trigger) {
+		ticker->tick = 0;
+		return false;
+	}
 	ticker->tick = ticker->nticks;
 	return true;
 }
 
 static inline bool
-ticker_ticks(ticker_t *ticker, int32_t nticks) {
+ticker_ticks(ticker_t *ticker, int32_t nticks, bool delay_trigger) {
 	ticker->tick -= nticks;
 	if (unlikely(ticker->tick < 0)) {
-		return ticker_fixup(ticker);
+		return ticker_fixup(ticker, delay_trigger);
 	}
 	return false;
 }
 
 static inline bool
-ticker_tick(ticker_t *ticker) {
-	return ticker_ticks(ticker, 1);
+ticker_tick(ticker_t *ticker, bool delay_trigger) {
+	return ticker_ticks(ticker, 1, delay_trigger);
 }
 
 /*
@@ -124,7 +129,8 @@ struct ticker_geom_s {
  * the behavior over long periods of time rather than the exact timing of the
  * initial ticks.
  */
-#define TICKER_GEOM_INIT(nticks) {nticks, nticks}
+#define TICKER_GEOM_INIT(nticks)                                               \
+	{ nticks, nticks }
 
 static inline void
 ticker_geom_init(ticker_geom_t *ticker, int32_t nticks) {
@@ -145,31 +151,39 @@ ticker_geom_read(const ticker_geom_t *ticker) {
 }
 
 /* Same deal as above. */
-#if defined(__GNUC__) && !defined(__clang__)				\
+#if defined(__GNUC__) && !defined(__clang__)                                   \
     && (defined(__x86_64__) || defined(__i386__))
 JEMALLOC_NOINLINE
 #endif
 static bool
-ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state) {
+ticker_geom_fixup(
+    ticker_geom_t *ticker, uint64_t *prng_state, bool delay_trigger) {
+	if (delay_trigger) {
+		ticker->tick = 0;
+		return false;
+	}
+
 	uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
-	ticker->tick = (uint32_t)(
-	    (uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
-	    / (uint64_t)TICKER_GEOM_MUL);
+	ticker->tick = (uint32_t)((uint64_t)ticker->nticks
+	    * (uint64_t)ticker_geom_table[idx] / (uint64_t)TICKER_GEOM_MUL);
+
 	return true;
 }
 
 static inline bool
-ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks) {
+ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks,
+    bool delay_trigger) {
 	ticker->tick -= nticks;
 	if (unlikely(ticker->tick < 0)) {
-		return ticker_geom_fixup(ticker, prng_state);
+		return ticker_geom_fixup(ticker, prng_state, delay_trigger);
 	}
 	return false;
 }
 
 static inline bool
-ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state) {
-	return ticker_geom_ticks(ticker, prng_state, 1);
+ticker_geom_tick(
+    ticker_geom_t *ticker, uint64_t *prng_state, bool delay_trigger) {
+	return ticker_geom_ticks(ticker, prng_state, 1, delay_trigger);
 }
 
 #endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 66d68822..84101c65 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -1,313 +1,20 @@
 #ifndef JEMALLOC_INTERNAL_TSD_H
 #define JEMALLOC_INTERNAL_TSD_H
 
-#include "jemalloc/internal/activity_callback.h"
-#include "jemalloc/internal/arena_types.h"
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/bin_types.h"
-#include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/peak.h"
-#include "jemalloc/internal/prof_types.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/rtree_tsd.h"
-#include "jemalloc/internal/tcache_types.h"
-#include "jemalloc/internal/tcache_structs.h"
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/witness.h"
-
-/*
- * Thread-Specific-Data layout
- *
- * At least some thread-local data gets touched on the fast-path of almost all
- * malloc operations.  But much of it is only necessary down slow-paths, or
- * testing.  We want to colocate the fast-path data so that it can live on the
- * same cacheline if possible.  So we define three tiers of hotness:
- * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
- * TSD_DATA_SLOW: Touched down slow paths.  "Slow" here is sort of general;
- *     there are "semi-slow" paths like "not a sized deallocation, but can still
- *     live in the tcache".  We'll want to keep these closer to the fast-path
- *     data.
- * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
- *
- * An additional concern is that the larger tcache bins won't be used (we have a
- * bin per size class, but by default only cache relatively small objects).  So
- * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
- * TSD_DATA_SLOWER tier.
- *
- * As a result of all this, we put the slow data first, then the fast data, then
- * the slower data, while keeping the tcache as the last element of the fast
- * data (so that the fast -> slower transition happens midway through the
- * tcache).  While we don't yet play alignment tricks to guarantee it, this
- * increases our odds of getting some cache/page locality on fast paths.
- */
-
-#ifdef JEMALLOC_JET
-typedef void (*test_callback_t)(int *);
-#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
-#  define MALLOC_TEST_TSD \
-    O(test_data,		int,			int)		\
-    O(test_callback,		test_callback_t,	int)
-#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
-#else
-#  define MALLOC_TEST_TSD
-#  define MALLOC_TEST_TSD_INITIALIZER
-#endif
-
-typedef ql_elm(tsd_t) tsd_link_t;
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOW							\
-    O(tcache_enabled,		bool,			bool)		\
-    O(reentrancy_level,		int8_t,			int8_t)		\
-    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
-    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
-    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
-    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
-    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
-    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
-    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
-    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
-    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
-    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
-    O(prng_state,		uint64_t,		uint64_t)	\
-    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
-    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
-    O(iarena,			arena_t *,		arena_t *)	\
-    O(arena,			arena_t *,		arena_t *)	\
-    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
-    O(sec_shard,		uint8_t,		uint8_t)	\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
-    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
-    O(in_hook,			bool,			bool)		\
-    O(peak,			peak_t,			peak_t)		\
-    O(activity_callback_thunk,	activity_callback_thunk_t,		\
-	activity_callback_thunk_t)					\
-    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
-
-#define TSD_DATA_SLOW_INITIALIZER					\
-    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
-    /* reentrancy_level */	0,					\
-    /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	0,				\
-    /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	0,				\
-    /* tcache_gc_event_wait */		0,				\
-    /* tcache_gc_dalloc_event_wait */	0,				\
-    /* prof_sample_event_wait */	0,				\
-    /* prof_sample_last_event */	0,				\
-    /* stats_interval_event_wait */	0,				\
-    /* stats_interval_last_event */	0,				\
-    /* peak_alloc_event_wait */		0,				\
-    /* peak_dalloc_event_wait */	0,				\
-    /* prof_tdata */		NULL,					\
-    /* prng_state */		0,					\
-    /* san_extents_until_guard_small */	0,				\
-    /* san_extents_until_guard_large */	0,				\
-    /* iarena */		NULL,					\
-    /* arena */			NULL,					\
-    /* arena_decay_ticker */						\
-	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
-    /* sec_shard */		(uint8_t)-1,				\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
-    /* tsd_link */		{NULL},					\
-    /* in_hook */		false,					\
-    /* peak */			PEAK_INITIALIZER,			\
-    /* activity_callback_thunk */					\
-	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
-    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
-    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_FAST							\
-    O(thread_allocated,		uint64_t,		uint64_t)	\
-    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
-    O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(tcache,			tcache_t,		tcache_t)
-
-#define TSD_DATA_FAST_INITIALIZER					\
-    /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ 0, 				\
-    /* thread_deallocated */	0,					\
-    /* thread_deallocated_next_event_fast */	0,			\
-    /* tcache */		TCACHE_ZERO_INITIALIZER,
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOWER							\
-    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
-    MALLOC_TEST_TSD
-
-#define TSD_DATA_SLOWER_INITIALIZER					\
-    /* witness */		WITNESS_TSD_INITIALIZER			\
-    /* test data */		MALLOC_TEST_TSD_INITIALIZER
-
-
-#define TSD_INITIALIZER {						\
-    				TSD_DATA_SLOW_INITIALIZER		\
-    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
-    				TSD_DATA_FAST_INITIALIZER		\
-    				TSD_DATA_SLOWER_INITIALIZER		\
-}
-
-#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
-void _malloc_tsd_cleanup_register(bool (*f)(void));
-#endif
-
-void *malloc_tsd_malloc(size_t size);
-void malloc_tsd_dalloc(void *wrapper);
-tsd_t *malloc_tsd_boot0(void);
-void malloc_tsd_boot1(void);
-void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
-void tsd_state_set(tsd_t *tsd, uint8_t new_state);
-void tsd_slow_update(tsd_t *tsd);
-void tsd_prefork(tsd_t *tsd);
-void tsd_postfork_parent(tsd_t *tsd);
-void tsd_postfork_child(tsd_t *tsd);
-
-/*
- * Call ..._inc when your module wants to take all threads down the slow paths,
- * and ..._dec when it no longer needs to.
- */
-void tsd_global_slow_inc(tsdn_t *tsdn);
-void tsd_global_slow_dec(tsdn_t *tsdn);
-bool tsd_global_slow();
-
-enum {
-	/* Common case --> jnz. */
-	tsd_state_nominal = 0,
-	/* Initialized but on slow path. */
-	tsd_state_nominal_slow = 1,
-	/*
-	 * Some thread has changed global state in such a way that all nominal
-	 * threads need to recompute their fast / slow status the next time they
-	 * get a chance.
-	 *
-	 * Any thread can change another thread's status *to* recompute, but
-	 * threads are the only ones who can change their status *from*
-	 * recompute.
-	 */
-	tsd_state_nominal_recompute = 2,
-	/*
-	 * The above nominal states should be lower values.  We use
-	 * tsd_nominal_max to separate nominal states from threads in the
-	 * process of being born / dying.
-	 */
-	tsd_state_nominal_max = 2,
-
-	/*
-	 * A thread might free() during its death as its only allocator action;
-	 * in such scenarios, we need tsd, but set up in such a way that no
-	 * cleanup is necessary.
-	 */
-	tsd_state_minimal_initialized = 3,
-	/* States during which we know we're in thread death. */
-	tsd_state_purgatory = 4,
-	tsd_state_reincarnated = 5,
-	/*
-	 * What it says on the tin; tsd that hasn't been initialized.  Note
-	 * that even when the tsd struct lives in TLS, when need to keep track
-	 * of stuff like whether or not our pthread destructors have been
-	 * scheduled, so this really truly is different than the nominal state.
-	 */
-	tsd_state_uninitialized = 6
-};
-
-/*
- * Some TSD accesses can only be done in a nominal state.  To enforce this, we
- * wrap TSD member access in a function that asserts on TSD state, and mangle
- * field names to prevent touching them accidentally.
- */
-#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
-
-#ifdef JEMALLOC_U8_ATOMICS
-#  define tsd_state_t atomic_u8_t
-#  define tsd_atomic_load atomic_load_u8
-#  define tsd_atomic_store atomic_store_u8
-#  define tsd_atomic_exchange atomic_exchange_u8
-#else
-#  define tsd_state_t atomic_u32_t
-#  define tsd_atomic_load atomic_load_u32
-#  define tsd_atomic_store atomic_store_u32
-#  define tsd_atomic_exchange atomic_exchange_u32
-#endif
-
-/* The actual tsd. */
-struct tsd_s {
-	/*
-	 * The contents should be treated as totally opaque outside the tsd
-	 * module.  Access any thread-local state through the getters and
-	 * setters below.
-	 */
-
-#define O(n, t, nt)							\
-	t TSD_MANGLE(n);
-
-	TSD_DATA_SLOW
-	/*
-	 * We manually limit the state to just a single byte.  Unless the 8-bit
-	 * atomics are unavailable (which is rare).
-	 */
-	tsd_state_t state;
-	TSD_DATA_FAST
-	TSD_DATA_SLOWER
-#undef O
-};
-
-JEMALLOC_ALWAYS_INLINE uint8_t
-tsd_state_get(tsd_t *tsd) {
-	/*
-	 * This should be atomic.  Unfortunately, compilers right now can't tell
-	 * that this can be done as a memory comparison, and forces a load into
-	 * a register that hurts fast-path performance.
-	 */
-	/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
-	return *(uint8_t *)&tsd->state;
-}
-
-/*
- * Wrapper around tsd_t that makes it possible to avoid implicit conversion
- * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
- * explicitly converted to tsd_t, which is non-nullable.
- */
-struct tsdn_s {
-	tsd_t tsd;
-};
-#define TSDN_NULL ((tsdn_t *)0)
-JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsd_tsdn(tsd_t *tsd) {
-	return (tsdn_t *)tsd;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tsdn_null(const tsdn_t *tsdn) {
-	return tsdn == NULL;
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsdn_tsd(tsdn_t *tsdn) {
-	assert(!tsdn_null(tsdn));
-
-	return &tsdn->tsd;
-}
-
 /*
  * We put the platform-specific data declarations and inlines into their own
  * header files to avoid cluttering this file.  They define tsd_boot0,
  * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
  */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
+#	include "jemalloc/internal/jemalloc_preamble.h"
+#	include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
 #elif (defined(JEMALLOC_TLS))
-#include "jemalloc/internal/tsd_tls.h"
+#	include "jemalloc/internal/tsd_tls.h"
 #elif (defined(_WIN32))
-#include "jemalloc/internal/tsd_win.h"
+#	include "jemalloc/internal/tsd_win.h"
 #else
-#include "jemalloc/internal/tsd_generic.h"
+#	include "jemalloc/internal/tsd_generic.h"
 #endif
 
 /*
@@ -315,16 +22,16 @@ tsdn_tsd(tsdn_t *tsdn) {
  * foo.  This omits some safety checks, and so can be used during tsd
  * initialization and cleanup.
  */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE t *						\
-tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
-	return &tsd->TSD_MANGLE(n);					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE t *tsd_##n##p_get_unsafe(tsd_t *tsd) {          \
+		return &tsd->TSD_MANGLE(n);                                    \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
+/* clang-format off */
 /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t *						\
@@ -341,6 +48,7 @@ tsd_##n##p_get(tsd_t *tsd) {						\
 	    state == tsd_state_minimal_initialized);			\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
+/* clang-format on */
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
@@ -350,39 +58,36 @@ TSD_DATA_SLOWER
  * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
  * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
  */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE nt *						\
-tsdn_##n##p_get(tsdn_t *tsdn) {						\
-	if (tsdn_null(tsdn)) {						\
-		return NULL;						\
-	}								\
-	tsd_t *tsd = tsdn_tsd(tsdn);					\
-	return (nt *)tsd_##n##p_get(tsd);				\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE nt *tsdn_##n##p_get(tsdn_t *tsdn) {             \
+		if (tsdn_null(tsdn)) {                                         \
+			return NULL;                                           \
+		}                                                              \
+		tsd_t *tsd = tsdn_tsd(tsdn);                                   \
+		return (nt *)tsd_##n##p_get(tsd);                              \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE t						\
-tsd_##n##_get(tsd_t *tsd) {						\
-	return *tsd_##n##p_get(tsd);					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE t tsd_##n##_get(tsd_t *tsd) {                   \
+		return *tsd_##n##p_get(tsd);                                   \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE void						\
-tsd_##n##_set(tsd_t *tsd, t val) {					\
-	assert(tsd_state_get(tsd) != tsd_state_reincarnated &&		\
-	    tsd_state_get(tsd) != tsd_state_minimal_initialized);	\
-	*tsd_##n##p_get(tsd) = val;					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE void tsd_##n##_set(tsd_t *tsd, t val) {         \
+		assert(tsd_state_get(tsd) != tsd_state_reincarnated            \
+		    && tsd_state_get(tsd) != tsd_state_minimal_initialized);   \
+		*tsd_##n##p_get(tsd) = val;                                    \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
@@ -395,8 +100,8 @@ tsd_assert_fast(tsd_t *tsd) {
 	 * counters; it's not in general possible to ensure that they won't
 	 * change asynchronously from underneath us.
 	 */
-	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0);
+	assert(!malloc_slow && tsd_tcache_enabled_get(tsd)
+	    && tsd_reentrancy_level_get(tsd) == 0);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -485,8 +190,8 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 
 static inline bool
 tsd_state_nocleanup(tsd_t *tsd) {
-	return tsd_state_get(tsd) == tsd_state_reincarnated ||
-	    tsd_state_get(tsd) == tsd_state_minimal_initialized;
+	return tsd_state_get(tsd) == tsd_state_reincarnated
+	    || tsd_state_get(tsd) == tsd_state_minimal_initialized;
 }
 
 /*
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index a718472f..e049766f 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -1,31 +1,35 @@
 #ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_GENERIC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
+
 typedef struct tsd_init_block_s tsd_init_block_t;
 struct tsd_init_block_s {
 	ql_elm(tsd_init_block_t) link;
 	pthread_t thread;
-	void *data;
+	void     *data;
 };
 
 /* Defined in tsd.c, to allow the mutex headers to have tsd dependencies. */
 typedef struct tsd_init_head_s tsd_init_head_t;
 
 typedef struct {
-	bool initialized;
+	bool  initialized;
 	tsd_t val;
 } tsd_wrapper_t;
 
-void *tsd_init_check_recursion(tsd_init_head_t *head,
-    tsd_init_block_t *block);
-void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
+void *tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block);
+void  tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
 
-extern pthread_key_t tsd_tsd;
+extern pthread_key_t   tsd_tsd;
 extern tsd_init_head_t tsd_init_head;
-extern tsd_wrapper_t tsd_boot_wrapper;
-extern bool tsd_booted;
+extern tsd_wrapper_t   tsd_boot_wrapper;
+extern bool            tsd_booted;
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE void
@@ -37,8 +41,8 @@ tsd_cleanup_wrapper(void *arg) {
 		tsd_cleanup(&wrapper->val);
 		if (wrapper->initialized) {
 			/* Trigger another cleanup round. */
-			if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0)
-			{
+			if (pthread_setspecific(tsd_tsd, (void *)wrapper)
+			    != 0) {
 				malloc_write("<jemalloc>: Error setting TSD\n");
 				if (opt_abort) {
 					abort();
@@ -73,23 +77,23 @@ tsd_wrapper_get(bool init) {
 
 	if (init && unlikely(wrapper == NULL)) {
 		tsd_init_block_t block;
-		wrapper = (tsd_wrapper_t *)
-		    tsd_init_check_recursion(&tsd_init_head, &block);
+		wrapper = (tsd_wrapper_t *)tsd_init_check_recursion(
+		    &tsd_init_head, &block);
 		if (wrapper) {
 			return wrapper;
 		}
-		wrapper = (tsd_wrapper_t *)
-		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(
+		    sizeof(tsd_wrapper_t));
 		block.data = (void *)wrapper;
 		if (wrapper == NULL) {
 			malloc_write("<jemalloc>: Error allocating TSD\n");
 			abort();
 		} else {
 			wrapper->initialized = false;
-      JEMALLOC_DIAGNOSTIC_PUSH
-      JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+			JEMALLOC_DIAGNOSTIC_PUSH
+			JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 			tsd_t initializer = TSD_INITIALIZER;
-      JEMALLOC_DIAGNOSTIC_POP
+			JEMALLOC_DIAGNOSTIC_POP
 			wrapper->val = initializer;
 		}
 		tsd_wrapper_set(wrapper);
@@ -100,11 +104,11 @@ tsd_wrapper_get(bool init) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tsd_boot0(void) {
-	tsd_wrapper_t *wrapper;
+	tsd_wrapper_t   *wrapper;
 	tsd_init_block_t block;
 
-	wrapper = (tsd_wrapper_t *)
-	    tsd_init_check_recursion(&tsd_init_head, &block);
+	wrapper = (tsd_wrapper_t *)tsd_init_check_recursion(
+	    &tsd_init_head, &block);
 	if (wrapper) {
 		return false;
 	}
@@ -129,10 +133,10 @@ tsd_boot1(void) {
 	tsd_boot_wrapper.initialized = false;
 	tsd_cleanup(&tsd_boot_wrapper.val);
 	wrapper->initialized = false;
-  JEMALLOC_DIAGNOSTIC_PUSH
-  JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+	JEMALLOC_DIAGNOSTIC_PUSH
+	JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 	tsd_t initializer = TSD_INITIALIZER;
-  JEMALLOC_DIAGNOSTIC_POP
+	JEMALLOC_DIAGNOSTIC_POP
 	wrapper->val = initializer;
 	tsd_wrapper_set(wrapper);
 }
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
new file mode 100644
index 00000000..53b58d0c
--- /dev/null
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -0,0 +1,280 @@
+#ifdef JEMALLOC_INTERNAL_TSD_INTERNALS_H
+#error This file should be included only once, by one of tsd_malloc_thread_cleanup.h, tsd_tls.h, tsd_generic.h, or tsd_win.h
+#endif
+#define JEMALLOC_INTERNAL_TSD_INTERNALS_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bin_types.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/peak.h"
+#include "jemalloc/internal/prof_types.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/rtree_tsd.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/tsd_types.h"
+#include "jemalloc/internal/util.h"
+#include "jemalloc/internal/witness.h"
+
+/*
+ * Thread-Specific-Data layout
+ *
+ * At least some thread-local data gets touched on the fast-path of almost all
+ * malloc operations.  But much of it is only necessary down slow-paths, or
+ * testing.  We want to colocate the fast-path data so that it can live on the
+ * same cacheline if possible.  So we define three tiers of hotness:
+ * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
+ * TSD_DATA_SLOW: Touched down slow paths.  "Slow" here is sort of general;
+ *     there are "semi-slow" paths like "not a sized deallocation, but can still
+ *     live in the tcache".  We'll want to keep these closer to the fast-path
+ *     data.
+ * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
+ *
+ * An additional concern is that the larger tcache bins won't be used (we have a
+ * bin per size class, but by default only cache relatively small objects).  So
+ * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
+ * TSD_DATA_SLOWER tier.
+ *
+ * As a result of all this, we put the slow data first, then the fast data, then
+ * the slower data, while keeping the tcache as the last element of the fast
+ * data (so that the fast -> slower transition happens midway through the
+ * tcache).  While we don't yet play alignment tricks to guarantee it, this
+ * increases our odds of getting some cache/page locality on fast paths.
+ */
+
+#ifdef JEMALLOC_JET
+typedef void (*test_callback_t)(int *);
+#	define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
+#	define MALLOC_TEST_TSD                                                \
+		O(test_data, int, int)                                         \
+		O(test_callback, test_callback_t, int)
+#	define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
+#else
+#	define MALLOC_TEST_TSD
+#	define MALLOC_TEST_TSD_INITIALIZER
+#endif
+
+typedef ql_elm(tsd_t) tsd_link_t;
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_SLOW                                                          \
+	O(tcache_enabled, bool, bool)                                          \
+	O(reentrancy_level, int8_t, int8_t)                                    \
+	O(min_init_state_nfetched, uint8_t, uint8_t)                           \
+	O(thread_allocated_last_event, uint64_t, uint64_t)                     \
+	O(thread_allocated_next_event, uint64_t, uint64_t)                     \
+	O(thread_deallocated_last_event, uint64_t, uint64_t)                   \
+	O(thread_deallocated_next_event, uint64_t, uint64_t)                   \
+	O(te_data, te_data_t, te_data_t)                                       \
+	O(prof_sample_last_event, uint64_t, uint64_t)                          \
+	O(stats_interval_last_event, uint64_t, uint64_t)                       \
+	O(prof_tdata, prof_tdata_t *, prof_tdata_t *)                          \
+	O(prng_state, uint64_t, uint64_t)                                      \
+	O(san_extents_until_guard_small, uint64_t, uint64_t)                   \
+	O(san_extents_until_guard_large, uint64_t, uint64_t)                   \
+	O(iarena, arena_t *, arena_t *)                                        \
+	O(arena, arena_t *, arena_t *)                                         \
+	O(arena_decay_ticker, ticker_geom_t, ticker_geom_t)                    \
+	O(sec_shard, uint8_t, uint8_t)                                         \
+	O(binshards, tsd_binshards_t, tsd_binshards_t)                         \
+	O(tsd_link, tsd_link_t, tsd_link_t)                                    \
+	O(in_hook, bool, bool)                                                 \
+	O(peak, peak_t, peak_t)                                                \
+	O(tcache_slow, tcache_slow_t, tcache_slow_t)                           \
+	O(rtree_ctx, rtree_ctx_t, rtree_ctx_t)
+
+#define TSD_DATA_SLOW_INITIALIZER                                              \
+	/* tcache_enabled */ TCACHE_ENABLED_ZERO_INITIALIZER,                  \
+	    /* reentrancy_level */ 0, /* min_init_state_nfetched */ 0,         \
+	    /* thread_allocated_last_event */ 0,                               \
+	    /* thread_allocated_next_event */ 0,                               \
+	    /* thread_deallocated_last_event */ 0,                             \
+	    /* thread_deallocated_next_event */ 0,                             \
+	    /* te_data */ TE_DATA_INITIALIZER, /* prof_sample_last_event */ 0, \
+	    /* stats_interval_last_event */ 0, /* prof_tdata */ NULL,          \
+	    /* prng_state */ 0, /* san_extents_until_guard_small */ 0,         \
+	    /* san_extents_until_guard_large */ 0, /* iarena */ NULL,          \
+	    /* arena */ NULL, /* arena_decay_ticker */                         \
+	    TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),                   \
+	    /* sec_shard */ (uint8_t) - 1,                                     \
+	    /* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER,                    \
+	    /* tsd_link */ {NULL}, /* in_hook */ false,                        \
+	    /* peak */ PEAK_INITIALIZER,                                       \
+	    /* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER,                    \
+	    /* rtree_ctx */ RTREE_CTX_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_FAST                                                          \
+	O(thread_allocated, uint64_t, uint64_t)                                \
+	O(thread_allocated_next_event_fast, uint64_t, uint64_t)                \
+	O(thread_deallocated, uint64_t, uint64_t)                              \
+	O(thread_deallocated_next_event_fast, uint64_t, uint64_t)              \
+	O(tcache, tcache_t, tcache_t)
+
+#define TSD_DATA_FAST_INITIALIZER                                              \
+	/* thread_allocated */ 0, /* thread_allocated_next_event_fast */ 0,    \
+	    /* thread_deallocated */ 0,                                        \
+	    /* thread_deallocated_next_event_fast */ 0,                        \
+	    /* tcache */ TCACHE_ZERO_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_SLOWER                                                        \
+	O(witness_tsd, witness_tsd_t, witness_tsdn_t)                          \
+	MALLOC_TEST_TSD
+
+#define TSD_DATA_SLOWER_INITIALIZER                                            \
+	/* witness */ WITNESS_TSD_INITIALIZER                                  \
+	/* test data */ MALLOC_TEST_TSD_INITIALIZER
+
+#define TSD_INITIALIZER                                                        \
+	{                                                                      \
+		TSD_DATA_SLOW_INITIALIZER                                      \
+		/* state */ ATOMIC_INIT(tsd_state_uninitialized),              \
+		    TSD_DATA_FAST_INITIALIZER TSD_DATA_SLOWER_INITIALIZER      \
+	}
+
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+void _malloc_tsd_cleanup_register(bool (*f)(void));
+#endif
+
+void  *malloc_tsd_malloc(size_t size);
+void   malloc_tsd_dalloc(void *wrapper);
+tsd_t *malloc_tsd_boot0(void);
+void   malloc_tsd_boot1(void);
+void   tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
+void   tsd_state_set(tsd_t *tsd, uint8_t new_state);
+void   tsd_slow_update(tsd_t *tsd);
+void   tsd_prefork(tsd_t *tsd);
+void   tsd_postfork_parent(tsd_t *tsd);
+void   tsd_postfork_child(tsd_t *tsd);
+
+/*
+ * Call ..._inc when your module wants to take all threads down the slow paths,
+ * and ..._dec when it no longer needs to.
+ */
+void tsd_global_slow_inc(tsdn_t *tsdn);
+void tsd_global_slow_dec(tsdn_t *tsdn);
+bool tsd_global_slow(void);
+
+#define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
+
+enum {
+	/* Common case --> jnz. */
+	tsd_state_nominal = 0,
+	/* Initialized but on slow path. */
+	tsd_state_nominal_slow = 1,
+	/*
+	 * Some thread has changed global state in such a way that all nominal
+	 * threads need to recompute their fast / slow status the next time they
+	 * get a chance.
+	 *
+	 * Any thread can change another thread's status *to* recompute, but
+	 * threads are the only ones who can change their status *from*
+	 * recompute.
+	 */
+	tsd_state_nominal_recompute = 2,
+	/*
+	 * The above nominal states should be lower values.  We use
+	 * tsd_nominal_max to separate nominal states from threads in the
+	 * process of being born / dying.
+	 */
+	tsd_state_nominal_max = 2,
+
+	/*
+	 * A thread might free() during its death as its only allocator action;
+	 * in such scenarios, we need tsd, but set up in such a way that no
+	 * cleanup is necessary.
+	 */
+	tsd_state_minimal_initialized = 3,
+	/* States during which we know we're in thread death. */
+	tsd_state_purgatory = 4,
+	tsd_state_reincarnated = 5,
+	/*
+	 * What it says on the tin; tsd that hasn't been initialized.  Note
+	 * that even when the tsd struct lives in TLS, when need to keep track
+	 * of stuff like whether or not our pthread destructors have been
+	 * scheduled, so this really truly is different than the nominal state.
+	 */
+	tsd_state_uninitialized = 6
+};
+
+/*
+ * Some TSD accesses can only be done in a nominal state.  To enforce this, we
+ * wrap TSD member access in a function that asserts on TSD state, and mangle
+ * field names to prevent touching them accidentally.
+ */
+#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
+
+#ifdef JEMALLOC_U8_ATOMICS
+#	define tsd_state_t atomic_u8_t
+#	define tsd_atomic_load atomic_load_u8
+#	define tsd_atomic_store atomic_store_u8
+#	define tsd_atomic_exchange atomic_exchange_u8
+#else
+#	define tsd_state_t atomic_u32_t
+#	define tsd_atomic_load atomic_load_u32
+#	define tsd_atomic_store atomic_store_u32
+#	define tsd_atomic_exchange atomic_exchange_u32
+#endif
+
+/* The actual tsd. */
+struct tsd_s {
+	/*
+	 * The contents should be treated as totally opaque outside the tsd
+	 * module.  Access any thread-local state through the getters and
+	 * setters below.
+	 */
+
+#define O(n, t, nt) t TSD_MANGLE(n);
+
+	TSD_DATA_SLOW
+	/*
+	 * We manually limit the state to just a single byte.  Unless the 8-bit
+	 * atomics are unavailable (which is rare).
+	 */
+	tsd_state_t state;
+	TSD_DATA_FAST
+	TSD_DATA_SLOWER
+#undef O
+};
+
+JEMALLOC_ALWAYS_INLINE uint8_t
+tsd_state_get(tsd_t *tsd) {
+	/*
+	 * This should be atomic.  Unfortunately, compilers right now can't tell
+	 * that this can be done as a memory comparison, and forces a load into
+	 * a register that hurts fast-path performance.
+	 */
+	/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
+	return *(uint8_t *)&tsd->state;
+}
+
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t tsd;
+};
+#define TSDN_NULL ((tsdn_t *)0)
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd) {
+	return (tsdn_t *)tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn) {
+	return tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn) {
+	assert(!tsdn_null(tsdn));
+
+	return &tsdn->tsd;
+}
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index d8f3ef13..00756df1 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -1,8 +1,12 @@
 #ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
+
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index 7d6c805b..6536eb54 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -1,13 +1,17 @@
 #ifdef JEMALLOC_INTERNAL_TSD_TLS_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
+
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
 extern pthread_key_t tsd_tsd;
-extern bool tsd_booted;
+extern bool          tsd_booted;
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index a6ae37da..46479506 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -1,9 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
 #define JEMALLOC_INTERNAL_TSD_TYPES_H
 
-#define MALLOC_TSD_CLEANUPS_MAX	4
+#define MALLOC_TSD_CLEANUPS_MAX 4
 
-typedef struct tsd_s tsd_t;
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+typedef struct tsd_s  tsd_t;
 typedef struct tsdn_s tsdn_t;
 typedef bool (*malloc_tsd_cleanup_t)(void);
 
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index a91dac88..8b22bec1 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -1,21 +1,41 @@
 #ifdef JEMALLOC_INTERNAL_TSD_WIN_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_WIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
+
+/* val should always be the first field of tsd_wrapper_t since accessing
+   val is the common path and having val as the first field makes it possible
+   that converting a pointer to tsd_wrapper_t to a pointer to val is no more
+   than a type cast. */
 typedef struct {
-	bool initialized;
 	tsd_t val;
+	bool  initialized;
 } tsd_wrapper_t;
 
-extern DWORD tsd_tsd;
+#if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+
+extern DWORD         tsd_tsd;
 extern tsd_wrapper_t tsd_boot_wrapper;
-extern bool tsd_booted;
+extern bool          tsd_booted;
+#        if defined(_M_ARM64EC)
+#                define JEMALLOC_WIN32_TLSGETVALUE2 0
+#        else
+#                define JEMALLOC_WIN32_TLSGETVALUE2 1
+#        endif
+#        if JEMALLOC_WIN32_TLSGETVALUE2
+typedef LPVOID(WINAPI *TGV2)(DWORD dwTlsIndex);
+extern TGV2    tls_get_value2;
+extern HMODULE tgv2_mod;
+#	endif
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE bool
 tsd_cleanup_wrapper(void) {
-	DWORD error = GetLastError();
+	DWORD          error = GetLastError();
 	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
 	SetLastError(error);
 
@@ -45,13 +65,21 @@ tsd_wrapper_set(tsd_wrapper_t *wrapper) {
 
 JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
 tsd_wrapper_get(bool init) {
-	DWORD error = GetLastError();
-	tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
-	SetLastError(error);
+	tsd_wrapper_t *wrapper;
+#	if JEMALLOC_WIN32_TLSGETVALUE2
+	if (tls_get_value2 != NULL) {
+		wrapper = (tsd_wrapper_t *)tls_get_value2(tsd_tsd);
+	} else
+#	endif
+	{
+		DWORD error = GetLastError();
+		wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
+		SetLastError(error);
+	}
 
 	if (init && unlikely(wrapper == NULL)) {
-		wrapper = (tsd_wrapper_t *)
-		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(
+		    sizeof(tsd_wrapper_t));
 		if (wrapper == NULL) {
 			malloc_write("<jemalloc>: Error allocating TSD\n");
 			abort();
@@ -74,6 +102,12 @@ tsd_boot0(void) {
 	}
 	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_wrapper_set(&tsd_boot_wrapper);
+#	if JEMALLOC_WIN32_TLSGETVALUE2
+	tgv2_mod = LoadLibraryA("api-ms-win-core-processthreads-l1-1-8.dll");
+	if (tgv2_mod != NULL) {
+		tls_get_value2 = (TGV2)GetProcAddress(tgv2_mod, "TlsGetValue2");
+	}
+#	endif
 	tsd_booted = true;
 	return false;
 }
@@ -81,8 +115,7 @@ tsd_boot0(void) {
 JEMALLOC_ALWAYS_INLINE void
 tsd_boot1(void) {
 	tsd_wrapper_t *wrapper;
-	wrapper = (tsd_wrapper_t *)
-	    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+	wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t));
 	if (wrapper == NULL) {
 		malloc_write("<jemalloc>: Error allocating TSD\n");
 		abort();
@@ -137,3 +170,69 @@ tsd_set(tsd_t *val) {
 	}
 	wrapper->initialized = true;
 }
+
+#else // defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+
+#	define JEMALLOC_TSD_TYPE_ATTR(type) __declspec(thread) type
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+	if (tsd_wrapper_tls.initialized) {
+		tsd_wrapper_tls.initialized = false;
+		tsd_cleanup(&tsd_wrapper_tls.val);
+		if (tsd_wrapper_tls.initialized) {
+			/* Trigger another cleanup round. */
+			return true;
+		}
+	}
+	return false;
+}
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	_malloc_tsd_cleanup_register(tsd_cleanup_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	/* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	return &(tsd_wrapper_tls.val);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	assert(tsd_booted);
+	if (likely(&(tsd_wrapper_tls.val) != val)) {
+		tsd_wrapper_tls.val = (*val);
+	}
+	tsd_wrapper_tls.initialized = true;
+}
+
+#endif // defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
index 6535055a..78704e48 100644
--- a/include/jemalloc/internal/typed_list.h
+++ b/include/jemalloc/internal/typed_list.h
@@ -6,50 +6,49 @@
  * bit easier to use; it handles ql_elm_new calls and provides type safety.
  */
 
-#define TYPED_LIST(list_type, el_type, linkage)				\
-typedef struct {							\
-	ql_head(el_type) head;						\
-} list_type##_t;							\
-static inline void							\
-list_type##_init(list_type##_t *list) {					\
-	ql_new(&list->head);						\
-}									\
-static inline el_type *							\
-list_type##_first(const list_type##_t *list) {				\
-	return ql_first(&list->head);					\
-}									\
-static inline el_type *							\
-list_type##_last(const list_type##_t *list) {				\
-	return ql_last(&list->head, linkage);				\
-}									\
-static inline void							\
-list_type##_append(list_type##_t *list, el_type *item) {		\
-	ql_elm_new(item, linkage);					\
-	ql_tail_insert(&list->head, item, linkage);			\
-}									\
-static inline void							\
-list_type##_prepend(list_type##_t *list, el_type *item) {		\
-	ql_elm_new(item, linkage);					\
-	ql_head_insert(&list->head, item, linkage);			\
-}									\
-static inline void							\
-list_type##_replace(list_type##_t *list, el_type *to_remove,		\
-    el_type *to_insert) {						\
-	ql_elm_new(to_insert, linkage);					\
-	ql_after_insert(to_remove, to_insert, linkage);			\
-	ql_remove(&list->head, to_remove, linkage);			\
-}									\
-static inline void							\
-list_type##_remove(list_type##_t *list, el_type *item) {		\
-	ql_remove(&list->head, item, linkage);				\
-}									\
-static inline bool							\
-list_type##_empty(list_type##_t *list) {				\
-	return ql_empty(&list->head);					\
-}									\
-static inline void							\
-list_type##_concat(list_type##_t *list_a, list_type##_t *list_b) {	\
-	ql_concat(&list_a->head, &list_b->head, linkage);		\
-}
+#define TYPED_LIST(list_type, el_type, linkage)                                \
+	typedef struct {                                                       \
+		ql_head(el_type) head;                                         \
+	} list_type##_t;                                                       \
+	static inline void list_type##_init(list_type##_t *list) {             \
+		ql_new(&list->head);                                           \
+	}                                                                      \
+	static inline el_type *list_type##_first(const list_type##_t *list) {  \
+		return ql_first(&list->head);                                  \
+	}                                                                      \
+	static inline el_type *list_type##_last(const list_type##_t *list) {   \
+		return ql_last(&list->head, linkage);                          \
+	}                                                                      \
+	static inline el_type *list_type##_next(                               \
+	    const list_type##_t *list, el_type *item) {                        \
+		return ql_next(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_append(                                 \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_elm_new(item, linkage);                                     \
+		ql_tail_insert(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_prepend(                                \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_elm_new(item, linkage);                                     \
+		ql_head_insert(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_replace(                                \
+	    list_type##_t *list, el_type *to_remove, el_type *to_insert) {     \
+		ql_elm_new(to_insert, linkage);                                \
+		ql_after_insert(to_remove, to_insert, linkage);                \
+		ql_remove(&list->head, to_remove, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_remove(                                 \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_remove(&list->head, item, linkage);                         \
+	}                                                                      \
+	static inline bool list_type##_empty(list_type##_t *list) {            \
+		return ql_empty(&list->head);                                  \
+	}                                                                      \
+	static inline void list_type##_concat(                                 \
+	    list_type##_t *list_a, list_type##_t *list_b) {                    \
+		ql_concat(&list_a->head, &list_b->head, linkage);              \
+	}
 
 #endif /* JEMALLOC_INTERNAL_TYPED_LIST_H */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index dcb1c0a5..ecfa76b8 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -1,14 +1,17 @@
 #ifndef JEMALLOC_INTERNAL_UTIL_H
 #define JEMALLOC_INTERNAL_UTIL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
 #define UTIL_INLINE static inline
 
 /* Junk fill patterns. */
 #ifndef JEMALLOC_ALLOC_JUNK
-#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#	define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5)
 #endif
 #ifndef JEMALLOC_FREE_JUNK
-#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#	define JEMALLOC_FREE_JUNK ((uint8_t)0x5a)
 #endif
 
 /*
@@ -17,6 +20,9 @@
  */
 #define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
 
+/* Number of elements in a fixed-size array. */
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
 /* cpp macro definition stringification. */
 #define STRINGIFY_HELPER(x) #x
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
@@ -26,22 +32,25 @@
  * wherever the compiler fails to recognize that the variable is never used
  * uninitialized.
  */
-#define JEMALLOC_CC_SILENCE_INIT(v) = v
+#define JEMALLOC_CC_SILENCE_INIT(...) = __VA_ARGS__
 
 #ifdef __GNUC__
-#  define likely(x)   __builtin_expect(!!(x), 1)
-#  define unlikely(x) __builtin_expect(!!(x), 0)
+#	define likely(x) __builtin_expect(!!(x), 1)
+#	define unlikely(x) __builtin_expect(!!(x), 0)
 #else
-#  define likely(x)   !!(x)
-#  define unlikely(x) !!(x)
+#	define likely(x) !!(x)
+#	define unlikely(x) !!(x)
 #endif
 
-#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
-#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#	include <stddef.h>
+#else
+#	if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#		error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#	endif
+#	define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
 #endif
 
-#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
-
 /* Set error code. */
 UTIL_INLINE void
 set_errno(int errnum) {
@@ -62,12 +71,29 @@ get_errno(void) {
 #endif
 }
 
-JEMALLOC_ALWAYS_INLINE void
-util_assume(bool b) {
-	if (!b) {
-		unreachable();
-	}
-}
+#ifdef _MSC_VER
+#	define util_assume __assume
+#elif defined(__clang__)                                                       \
+    && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6))
+#	define util_assume __builtin_assume
+#else
+#	define util_assume(expr)                                              \
+		do {                                                           \
+			if (!(expr)) {                                         \
+				unreachable();                                 \
+			}                                                      \
+		} while (0)
+#endif
+
+/* Allows compiler constant folding on inlined paths. */
+#if defined(__has_builtin)
+#	if __has_builtin(__builtin_constant_p)
+#		define util_compile_time_const(x) __builtin_constant_p(x)
+#	endif
+#endif
+#ifndef util_compile_time_const
+#	define util_compile_time_const(x) (false)
+#endif
 
 /* ptr should be valid. */
 JEMALLOC_ALWAYS_INLINE void
@@ -107,17 +133,24 @@ util_prefetch_write(void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 util_prefetch_read_range(void *ptr, size_t sz) {
 	for (size_t i = 0; i < sz; i += CACHELINE) {
-		util_prefetch_read((void *)((uintptr_t)ptr + i));
+		util_prefetch_read((void *)((byte_t *)ptr + i));
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 util_prefetch_write_range(void *ptr, size_t sz) {
 	for (size_t i = 0; i < sz; i += CACHELINE) {
-		util_prefetch_write((void *)((uintptr_t)ptr + i));
+		util_prefetch_write((void *)((byte_t *)ptr + i));
 	}
 }
 
 #undef UTIL_INLINE
 
+/*
+ * Reads the settings in the following format:
+ * key1-key2:value|key3-key4:value|...
+ * Note it does not handle the ending '\0'.
+ */
+bool multi_setting_parse_next(const char **setting_segment_cur,
+    size_t *len_left, size_t *key_start, size_t *key_end, size_t *value);
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index e81b9a00..0a426ff5 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_WITNESS_H
 #define JEMALLOC_INTERNAL_WITNESS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ql.h"
 
 /******************************************************************************/
@@ -44,7 +46,7 @@ enum witness_rank_e {
 	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
 	WITNESS_RANK_TCACHE_QL,
 
-	WITNESS_RANK_SEC_SHARD,
+	WITNESS_RANK_SEC_BIN,
 
 	WITNESS_RANK_EXTENT_GROW,
 	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
@@ -63,7 +65,7 @@ enum witness_rank_e {
 	WITNESS_RANK_ARENA_LARGE,
 	WITNESS_RANK_HOOK,
 
-	WITNESS_RANK_LEAF=0x1000,
+	WITNESS_RANK_LEAF = 0x1000,
 	WITNESS_RANK_BIN = WITNESS_RANK_LEAF,
 	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
@@ -75,6 +77,7 @@ enum witness_rank_e {
 	WITNESS_RANK_PROF_RECENT_ALLOC = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT = WITNESS_RANK_LEAF,
+	WITNESS_RANK_THREAD_EVENTS_USER = WITNESS_RANK_LEAF,
 };
 typedef enum witness_rank_e witness_rank_t;
 
@@ -82,38 +85,43 @@ typedef enum witness_rank_e witness_rank_t;
 /* PER-WITNESS DATA */
 /******************************************************************************/
 #if defined(JEMALLOC_DEBUG)
-#  define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#	define WITNESS_INITIALIZER(name, rank)                                \
+		{                                                              \
+			name, rank, NULL, NULL, {                              \
+				NULL, NULL                                     \
+			}                                                      \
+		}
 #else
-#  define WITNESS_INITIALIZER(name, rank)
+#	define WITNESS_INITIALIZER(name, rank)
 #endif
 
 typedef struct witness_s witness_t;
 typedef ql_head(witness_t) witness_list_t;
-typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
-    void *);
+typedef int witness_comp_t(
+    const witness_t *, void *, const witness_t *, void *);
 
 struct witness_s {
 	/* Name, used for printing lock order reversal messages. */
-	const char		*name;
+	const char *name;
 
 	/*
 	 * Witness rank, where 0 is lowest and WITNESS_RANK_LEAF is highest.
 	 * Witnesses must be acquired in order of increasing rank.
 	 */
-	witness_rank_t		rank;
+	witness_rank_t rank;
 
 	/*
 	 * If two witnesses are of equal rank and they have the samp comp
 	 * function pointer, it is called as a last attempt to differentiate
 	 * between witnesses of equal rank.
 	 */
-	witness_comp_t		*comp;
+	witness_comp_t *comp;
 
 	/* Opaque data, passed to comp(). */
-	void			*opaque;
+	void *opaque;
 
 	/* Linkage for thread's currently owned locks. */
-	ql_elm(witness_t)	link;
+	ql_elm(witness_t) link;
 };
 
 /******************************************************************************/
@@ -122,10 +130,11 @@ struct witness_s {
 typedef struct witness_tsd_s witness_tsd_t;
 struct witness_tsd_s {
 	witness_list_t witnesses;
-	bool forking;
+	bool           forking;
 };
 
-#define WITNESS_TSD_INITIALIZER { ql_head_initializer(witnesses), false }
+#define WITNESS_TSD_INITIALIZER                                                \
+	{ ql_head_initializer(witnesses), false }
 #define WITNESS_TSDN_NULL ((witness_tsdn_t *)0)
 
 /******************************************************************************/
@@ -158,17 +167,17 @@ witness_tsdn_tsd(witness_tsdn_t *witness_tsdn) {
 void witness_init(witness_t *witness, const char *name, witness_rank_t rank,
     witness_comp_t *comp, void *opaque);
 
-typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+typedef void(witness_lock_error_t)(const witness_list_t *, const witness_t *);
 extern witness_lock_error_t *JET_MUTABLE witness_lock_error;
 
-typedef void (witness_owner_error_t)(const witness_t *);
+typedef void(witness_owner_error_t)(const witness_t *);
 extern witness_owner_error_t *JET_MUTABLE witness_owner_error;
 
-typedef void (witness_not_owner_error_t)(const witness_t *);
+typedef void(witness_not_owner_error_t)(const witness_t *);
 extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error;
 
-typedef void (witness_depth_error_t)(const witness_list_t *,
-    witness_rank_t rank_inclusive, unsigned depth);
+typedef void(witness_depth_error_t)(
+    const witness_list_t *, witness_rank_t rank_inclusive, unsigned depth);
 extern witness_depth_error_t *JET_MUTABLE witness_depth_error;
 
 void witnesses_cleanup(witness_tsd_t *witness_tsd);
@@ -180,12 +189,12 @@ void witness_postfork_child(witness_tsd_t *witness_tsd);
 static inline bool
 witness_owner(witness_tsd_t *witness_tsd, const witness_t *witness) {
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	cassert(config_debug);
 
 	witnesses = &witness_tsd->witnesses;
-	ql_foreach(w, witnesses, link) {
+	ql_foreach (w, witnesses, link) {
 		if (w == witness) {
 			return true;
 		}
@@ -217,11 +226,11 @@ witness_assert_owner(witness_tsdn_t *witness_tsdn, const witness_t *witness) {
 }
 
 static inline void
-witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
-    const witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+witness_assert_not_owner(
+    witness_tsdn_t *witness_tsdn, const witness_t *witness) {
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	if (!config_debug) {
 		return;
@@ -236,7 +245,7 @@ witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
 	}
 
 	witnesses = &witness_tsd->witnesses;
-	ql_foreach(w, witnesses, link) {
+	ql_foreach (w, witnesses, link) {
 		if (w == witness) {
 			witness_not_owner_error(witness);
 		}
@@ -245,9 +254,9 @@ witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
 
 /* Returns depth.  Not intended for direct use. */
 static inline unsigned
-witness_depth_to_rank(witness_list_t *witnesses, witness_rank_t rank_inclusive)
-{
-	unsigned d = 0;
+witness_depth_to_rank(
+    witness_list_t *witnesses, witness_rank_t rank_inclusive) {
+	unsigned   d = 0;
 	witness_t *w = ql_last(witnesses, link);
 
 	if (w != NULL) {
@@ -270,7 +279,7 @@ witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
 	}
 
 	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
-	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+	unsigned        d = witness_depth_to_rank(witnesses, rank_inclusive);
 
 	if (d != depth) {
 		witness_depth_error(witnesses, rank_inclusive, depth);
@@ -288,14 +297,14 @@ witness_assert_lockless(witness_tsdn_t *witness_tsdn) {
 }
 
 static inline void
-witness_assert_positive_depth_to_rank(witness_tsdn_t *witness_tsdn,
-    witness_rank_t rank_inclusive) {
+witness_assert_positive_depth_to_rank(
+    witness_tsdn_t *witness_tsdn, witness_rank_t rank_inclusive) {
 	if (!config_debug || witness_tsdn_null(witness_tsdn)) {
 		return;
 	}
 
 	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
-	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+	unsigned        d = witness_depth_to_rank(witnesses, rank_inclusive);
 
 	if (d == 0) {
 		witness_depth_error(witnesses, rank_inclusive, 1);
@@ -304,9 +313,9 @@ witness_assert_positive_depth_to_rank(witness_tsdn_t *witness_tsdn,
 
 static inline void
 witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	if (!config_debug) {
 		return;
@@ -331,9 +340,9 @@ witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 	} else if (w->rank > witness->rank) {
 		/* Not forking, rank order reversal. */
 		witness_lock_error(witnesses, witness);
-	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
-	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
-	    0)) {
+	} else if (w->rank == witness->rank
+	    && (w->comp == NULL || w->comp != witness->comp
+	        || w->comp(w, w->opaque, witness, witness->opaque) > 0)) {
 		/*
 		 * Missing/incompatible comparison function, or comparison
 		 * function indicates rank order reversal.
@@ -341,13 +350,16 @@ witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 		witness_lock_error(witnesses, witness);
 	}
 
+	/* Suppress spurious warning from static analysis */
+	assert(
+	    ql_empty(witnesses) || qr_prev(ql_first(witnesses), link) != NULL);
 	ql_elm_new(witness, link);
 	ql_tail_insert(witnesses, witness, link);
 }
 
 static inline void
 witness_unlock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
 
 	if (!config_debug) {
diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh
index b19b1548..9eaca266 100755
--- a/include/jemalloc/jemalloc.sh
+++ b/include/jemalloc/jemalloc.sh
@@ -5,9 +5,7 @@ objroot=$1
 cat <<EOF
 #ifndef JEMALLOC_H_
 #define JEMALLOC_H_
-#ifdef __cplusplus
-extern "C" {
-#endif
+#pragma GCC system_header
 
 EOF
 
@@ -20,8 +18,5 @@ for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \
 done
 
 cat <<EOF
-#ifdef __cplusplus
-}
-#endif
 #endif /* JEMALLOC_H_ */
 EOF
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index cbe2fca6..96c75011 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -19,12 +19,16 @@
 /* Defined if cold attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_COLD
 
+/* Defined if deprecated attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_DEPRECATED
+
 /*
  * Define overrides for non-standard allocator-related functions if they are
  * present on the system.
  */
 #undef JEMALLOC_OVERRIDE_MEMALIGN
 #undef JEMALLOC_OVERRIDE_VALLOC
+#undef JEMALLOC_OVERRIDE_PVALLOC
 
 /*
  * At least Linux omits the "const" in:
@@ -42,6 +46,12 @@
  */
 #undef JEMALLOC_USE_CXX_THROW
 
+/*
+ * If undefined, disables reading configuration from environment variable or file
+ */
+#undef JEMALLOC_CONFIG_ENV
+#undef JEMALLOC_CONFIG_FILE
+
 #ifdef _MSC_VER
 #  ifdef _WIN64
 #    define LG_SIZEOF_PTR_WIN 3
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index ebb3137e..06f47b8a 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -52,7 +52,7 @@
 #define MALLCTL_ARENAS_DESTROYED	4097
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
-#  define JEMALLOC_CXX_THROW throw()
+#  define JEMALLOC_CXX_THROW noexcept (true)
 #else
 #  define JEMALLOC_CXX_THROW
 #endif
@@ -86,6 +86,7 @@
 #    define JEMALLOC_ALLOCATOR
 #  endif
 #  define JEMALLOC_COLD
+#  define JEMALLOC_WARN_ON_USAGE(warning_message)
 #elif defined(JEMALLOC_HAVE_ATTR)
 #  define JEMALLOC_ATTR(s) __attribute__((s))
 #  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
@@ -126,6 +127,11 @@
 #  else
 #    define JEMALLOC_COLD
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
+#    define JEMALLOC_WARN_ON_USAGE(warning_message) JEMALLOC_ATTR(deprecated(warning_message))
+#  else
+#    define JEMALLOC_WARN_ON_USAGE(warning_message)
+#  endif
 #else
 #  define JEMALLOC_ATTR(s)
 #  define JEMALLOC_ALIGNED(s)
@@ -140,9 +146,10 @@
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
 #  define JEMALLOC_COLD
+#  define JEMALLOC_WARN_ON_USAGE(warning_message)
 #endif
 
-#if (defined(__APPLE__) || defined(__FreeBSD__)) && !defined(JEMALLOC_NO_RENAME)
+#if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__linux__) && !defined(__GLIBC__))) && !defined(JEMALLOC_NO_RENAME)
 #  define JEMALLOC_SYS_NOTHROW
 #else
 #  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 356221cc..21e4dc57 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -3,7 +3,12 @@
  * of namespace management, and should be omitted in application code unless
  * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h).
  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern JEMALLOC_EXPORT const char	*@je_@malloc_conf;
+extern JEMALLOC_EXPORT const char	*@je_@malloc_conf_2_conf_harder;
 extern JEMALLOC_EXPORT void		(*@je_@malloc_message)(void *cbopaque,
     const char *s);
 
@@ -25,6 +30,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2);
 JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW	@je_@free(void *ptr)
     JEMALLOC_CXX_THROW;
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free_sized(void *ptr, size_t size);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free_aligned_sized(
+    void *ptr, size_t alignment, size_t size);
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_NOTHROW	*@je_@mallocx(size_t size, int flags)
@@ -69,3 +77,13 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_SYS_NOTHROW	*@je_@valloc(size_t size) JEMALLOC_CXX_THROW
     JEMALLOC_ATTR(malloc);
 #endif
+
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+    void JEMALLOC_SYS_NOTHROW	*@je_@pvalloc(size_t size) JEMALLOC_CXX_THROW
+    JEMALLOC_ATTR(malloc);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index 1a588743..793ee365 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -1,3 +1,7 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct extent_hooks_s extent_hooks_t;
 
 /*
@@ -75,3 +79,7 @@ struct extent_hooks_s {
 	extent_split_t		*split;
 	extent_merge_t		*merge;
 };
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/msvc_compat/C99/stdint.h b/include/msvc_compat/C99/stdint.h
index d02608a5..5ee3992b 100644
--- a/include/msvc_compat/C99/stdint.h
+++ b/include/msvc_compat/C99/stdint.h
@@ -1,68 +1,68 @@
 // ISO C9x  compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
 //  Copyright (c) 2006-2008 Alexander Chemeris
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
-// 
+//
 //   1. Redistributions of source code must retain the above copyright notice,
 //      this list of conditions and the following disclaimer.
-// 
+//
 //   2. Redistributions in binary form must reproduce the above copyright
 //      notice, this list of conditions and the following disclaimer in the
 //      documentation and/or other materials provided with the distribution.
-// 
+//
 //   3. The name of the author may be used to endorse or promote products
 //      derived from this software without specific prior written permission.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
+//
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
+#	error "Use this header only with Microsoft Visual C++ compilers!"
 #endif // _MSC_VER ]
 
 #ifndef _MSC_STDINT_H_ // [
-#define _MSC_STDINT_H_
+#	define _MSC_STDINT_H_
 
-#if _MSC_VER > 1000
-#pragma once
-#endif
+#	if _MSC_VER > 1000
+#		pragma once
+#	endif
 
-#include <limits.h>
+#	include <limits.h>
 
 // For Visual Studio 6 in C++ mode and for many Visual Studio versions when
 // compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
 // or compiler give many errors like this:
 //   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#ifdef __cplusplus
+#	ifdef __cplusplus
 extern "C" {
-#endif
-#  include <wchar.h>
-#ifdef __cplusplus
+#	endif
+#	include <wchar.h>
+#	ifdef __cplusplus
 }
-#endif
+#	endif
 
 // Define _W64 macros to mark types changing their size, like intptr_t.
-#ifndef _W64
-#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
-#     define _W64 __w64
-#  else
-#     define _W64
-#  endif
-#endif
-
+#	ifndef _W64
+#		if !defined(__midl) && (defined(_X86_) || defined(_M_IX86))   \
+		    && _MSC_VER >= 1300
+#			define _W64 __w64
+#		else
+#			define _W64
+#		endif
+#	endif
 
 // 7.18.1 Integer types
 
@@ -71,177 +71,177 @@ extern "C" {
 // Visual Studio 6 and Embedded Visual C++ 4 doesn't
 // realize that, e.g. char has the same size as __int8
 // so we give up on __intX for them.
-#if (_MSC_VER < 1300)
-   typedef signed char       int8_t;
-   typedef signed short      int16_t;
-   typedef signed int        int32_t;
-   typedef unsigned char     uint8_t;
-   typedef unsigned short    uint16_t;
-   typedef unsigned int      uint32_t;
-#else
-   typedef signed __int8     int8_t;
-   typedef signed __int16    int16_t;
-   typedef signed __int32    int32_t;
-   typedef unsigned __int8   uint8_t;
-   typedef unsigned __int16  uint16_t;
-   typedef unsigned __int32  uint32_t;
-#endif
-typedef signed __int64       int64_t;
-typedef unsigned __int64     uint64_t;
-
+#	if (_MSC_VER < 1300)
+typedef signed char    int8_t;
+typedef signed short   int16_t;
+typedef signed int     int32_t;
+typedef unsigned char  uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int   uint32_t;
+#        else
+typedef signed __int8    int8_t;
+typedef signed __int16   int16_t;
+typedef signed __int32   int32_t;
+typedef unsigned __int8  uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+#        endif
+typedef signed __int64   int64_t;
+typedef unsigned __int64 uint64_t;
 
 // 7.18.1.2 Minimum-width integer types
-typedef int8_t    int_least8_t;
-typedef int16_t   int_least16_t;
-typedef int32_t   int_least32_t;
-typedef int64_t   int_least64_t;
-typedef uint8_t   uint_least8_t;
-typedef uint16_t  uint_least16_t;
-typedef uint32_t  uint_least32_t;
-typedef uint64_t  uint_least64_t;
+typedef int8_t   int_least8_t;
+typedef int16_t  int_least16_t;
+typedef int32_t  int_least32_t;
+typedef int64_t  int_least64_t;
+typedef uint8_t  uint_least8_t;
+typedef uint16_t uint_least16_t;
+typedef uint32_t uint_least32_t;
+typedef uint64_t uint_least64_t;
 
 // 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t    int_fast8_t;
-typedef int16_t   int_fast16_t;
-typedef int32_t   int_fast32_t;
-typedef int64_t   int_fast64_t;
-typedef uint8_t   uint_fast8_t;
-typedef uint16_t  uint_fast16_t;
-typedef uint32_t  uint_fast32_t;
-typedef uint64_t  uint_fast64_t;
+typedef int8_t   int_fast8_t;
+typedef int16_t  int_fast16_t;
+typedef int32_t  int_fast32_t;
+typedef int64_t  int_fast64_t;
+typedef uint8_t  uint_fast8_t;
+typedef uint16_t uint_fast16_t;
+typedef uint32_t uint_fast32_t;
+typedef uint64_t uint_fast64_t;
 
 // 7.18.1.4 Integer types capable of holding object pointers
-#ifdef _WIN64 // [
-   typedef signed __int64    intptr_t;
-   typedef unsigned __int64  uintptr_t;
-#else // _WIN64 ][
-   typedef _W64 signed int   intptr_t;
-   typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
+#	ifdef _WIN64 // [
+typedef signed __int64   intptr_t;
+typedef unsigned __int64 uintptr_t;
+#	else  // _WIN64 ][
+typedef _W64 signed int   intptr_t;
+typedef _W64 unsigned int uintptr_t;
+#	endif // _WIN64 ]
 
 // 7.18.1.5 Greatest-width integer types
-typedef int64_t   intmax_t;
-typedef uint64_t  uintmax_t;
-
+typedef int64_t  intmax_t;
+typedef uint64_t uintmax_t;
 
 // 7.18.2 Limits of specified-width integer types
 
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+#	if !defined(__cplusplus)                                              \
+	    || defined(                                                        \
+	        __STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
 
 // 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN     ((int8_t)_I8_MIN)
-#define INT8_MAX     _I8_MAX
-#define INT16_MIN    ((int16_t)_I16_MIN)
-#define INT16_MAX    _I16_MAX
-#define INT32_MIN    ((int32_t)_I32_MIN)
-#define INT32_MAX    _I32_MAX
-#define INT64_MIN    ((int64_t)_I64_MIN)
-#define INT64_MAX    _I64_MAX
-#define UINT8_MAX    _UI8_MAX
-#define UINT16_MAX   _UI16_MAX
-#define UINT32_MAX   _UI32_MAX
-#define UINT64_MAX   _UI64_MAX
+#		define INT8_MIN ((int8_t)_I8_MIN)
+#		define INT8_MAX _I8_MAX
+#		define INT16_MIN ((int16_t)_I16_MIN)
+#		define INT16_MAX _I16_MAX
+#		define INT32_MIN ((int32_t)_I32_MIN)
+#		define INT32_MAX _I32_MAX
+#		define INT64_MIN ((int64_t)_I64_MIN)
+#		define INT64_MAX _I64_MAX
+#		define UINT8_MAX _UI8_MAX
+#		define UINT16_MAX _UI16_MAX
+#		define UINT32_MAX _UI32_MAX
+#		define UINT64_MAX _UI64_MAX
 
 // 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN    INT8_MIN
-#define INT_LEAST8_MAX    INT8_MAX
-#define INT_LEAST16_MIN   INT16_MIN
-#define INT_LEAST16_MAX   INT16_MAX
-#define INT_LEAST32_MIN   INT32_MIN
-#define INT_LEAST32_MAX   INT32_MAX
-#define INT_LEAST64_MIN   INT64_MIN
-#define INT_LEAST64_MAX   INT64_MAX
-#define UINT_LEAST8_MAX   UINT8_MAX
-#define UINT_LEAST16_MAX  UINT16_MAX
-#define UINT_LEAST32_MAX  UINT32_MAX
-#define UINT_LEAST64_MAX  UINT64_MAX
+#		define INT_LEAST8_MIN INT8_MIN
+#		define INT_LEAST8_MAX INT8_MAX
+#		define INT_LEAST16_MIN INT16_MIN
+#		define INT_LEAST16_MAX INT16_MAX
+#		define INT_LEAST32_MIN INT32_MIN
+#		define INT_LEAST32_MAX INT32_MAX
+#		define INT_LEAST64_MIN INT64_MIN
+#		define INT_LEAST64_MAX INT64_MAX
+#		define UINT_LEAST8_MAX UINT8_MAX
+#		define UINT_LEAST16_MAX UINT16_MAX
+#		define UINT_LEAST32_MAX UINT32_MAX
+#		define UINT_LEAST64_MAX UINT64_MAX
 
 // 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN    INT8_MIN
-#define INT_FAST8_MAX    INT8_MAX
-#define INT_FAST16_MIN   INT16_MIN
-#define INT_FAST16_MAX   INT16_MAX
-#define INT_FAST32_MIN   INT32_MIN
-#define INT_FAST32_MAX   INT32_MAX
-#define INT_FAST64_MIN   INT64_MIN
-#define INT_FAST64_MAX   INT64_MAX
-#define UINT_FAST8_MAX   UINT8_MAX
-#define UINT_FAST16_MAX  UINT16_MAX
-#define UINT_FAST32_MAX  UINT32_MAX
-#define UINT_FAST64_MAX  UINT64_MAX
+#		define INT_FAST8_MIN INT8_MIN
+#		define INT_FAST8_MAX INT8_MAX
+#		define INT_FAST16_MIN INT16_MIN
+#		define INT_FAST16_MAX INT16_MAX
+#		define INT_FAST32_MIN INT32_MIN
+#		define INT_FAST32_MAX INT32_MAX
+#		define INT_FAST64_MIN INT64_MIN
+#		define INT_FAST64_MAX INT64_MAX
+#		define UINT_FAST8_MAX UINT8_MAX
+#		define UINT_FAST16_MAX UINT16_MAX
+#		define UINT_FAST32_MAX UINT32_MAX
+#		define UINT_FAST64_MAX UINT64_MAX
 
 // 7.18.2.4 Limits of integer types capable of holding object pointers
-#ifdef _WIN64 // [
-#  define INTPTR_MIN   INT64_MIN
-#  define INTPTR_MAX   INT64_MAX
-#  define UINTPTR_MAX  UINT64_MAX
-#else // _WIN64 ][
-#  define INTPTR_MIN   INT32_MIN
-#  define INTPTR_MAX   INT32_MAX
-#  define UINTPTR_MAX  UINT32_MAX
-#endif // _WIN64 ]
+#		ifdef _WIN64 // [
+#			define INTPTR_MIN INT64_MIN
+#			define INTPTR_MAX INT64_MAX
+#			define UINTPTR_MAX UINT64_MAX
+#		else // _WIN64 ][
+#			define INTPTR_MIN INT32_MIN
+#			define INTPTR_MAX INT32_MAX
+#			define UINTPTR_MAX UINT32_MAX
+#		endif // _WIN64 ]
 
 // 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN   INT64_MIN
-#define INTMAX_MAX   INT64_MAX
-#define UINTMAX_MAX  UINT64_MAX
+#		define INTMAX_MIN INT64_MIN
+#		define INTMAX_MAX INT64_MAX
+#		define UINTMAX_MAX UINT64_MAX
 
 // 7.18.3 Limits of other integer types
 
-#ifdef _WIN64 // [
-#  define PTRDIFF_MIN  _I64_MIN
-#  define PTRDIFF_MAX  _I64_MAX
-#else  // _WIN64 ][
-#  define PTRDIFF_MIN  _I32_MIN
-#  define PTRDIFF_MAX  _I32_MAX
-#endif  // _WIN64 ]
+#		ifdef _WIN64 // [
+#			define PTRDIFF_MIN _I64_MIN
+#			define PTRDIFF_MAX _I64_MAX
+#		else // _WIN64 ][
+#			define PTRDIFF_MIN _I32_MIN
+#			define PTRDIFF_MAX _I32_MAX
+#		endif // _WIN64 ]
 
-#define SIG_ATOMIC_MIN  INT_MIN
-#define SIG_ATOMIC_MAX  INT_MAX
+#		define SIG_ATOMIC_MIN INT_MIN
+#		define SIG_ATOMIC_MAX INT_MAX
 
-#ifndef SIZE_MAX // [
-#  ifdef _WIN64 // [
-#     define SIZE_MAX  _UI64_MAX
-#  else // _WIN64 ][
-#     define SIZE_MAX  _UI32_MAX
-#  endif // _WIN64 ]
-#endif // SIZE_MAX ]
+#		ifndef SIZE_MAX      // [
+#			ifdef _WIN64 // [
+#				define SIZE_MAX _UI64_MAX
+#			else // _WIN64 ][
+#				define SIZE_MAX _UI32_MAX
+#			endif // _WIN64 ]
+#		endif         // SIZE_MAX ]
 
 // WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
-#ifndef WCHAR_MIN // [
-#  define WCHAR_MIN  0
-#endif  // WCHAR_MIN ]
-#ifndef WCHAR_MAX // [
-#  define WCHAR_MAX  _UI16_MAX
-#endif  // WCHAR_MAX ]
+#		ifndef WCHAR_MIN // [
+#			define WCHAR_MIN 0
+#		endif            // WCHAR_MIN ]
+#		ifndef WCHAR_MAX // [
+#			define WCHAR_MAX _UI16_MAX
+#		endif // WCHAR_MAX ]
 
-#define WINT_MIN  0
-#define WINT_MAX  _UI16_MAX
-
-#endif // __STDC_LIMIT_MACROS ]
+#		define WINT_MIN 0
+#		define WINT_MAX _UI16_MAX
 
+#	endif // __STDC_LIMIT_MACROS ]
 
 // 7.18.4 Limits of other integer types
 
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+#	if !defined(__cplusplus)                                              \
+	    || defined(                                                        \
+	        __STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
 
 // 7.18.4.1 Macros for minimum-width integer constants
 
-#define INT8_C(val)  val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
+#		define INT8_C(val) val##i8
+#		define INT16_C(val) val##i16
+#		define INT32_C(val) val##i32
+#		define INT64_C(val) val##i64
 
-#define UINT8_C(val)  val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
+#		define UINT8_C(val) val##ui8
+#		define UINT16_C(val) val##ui16
+#		define UINT32_C(val) val##ui32
+#		define UINT64_C(val) val##ui64
 
 // 7.18.4.2 Macros for greatest-width integer constants
-#define INTMAX_C   INT64_C
-#define UINTMAX_C  UINT64_C
-
-#endif // __STDC_CONSTANT_MACROS ]
+#		define INTMAX_C INT64_C
+#		define UINTMAX_C UINT64_C
 
+#	endif // __STDC_CONSTANT_MACROS ]
 
 #endif // _MSC_STDINT_H_ ]
diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h
index 996f256c..6a1acc0f 100644
--- a/include/msvc_compat/strings.h
+++ b/include/msvc_compat/strings.h
@@ -4,9 +4,10 @@
 /* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided
  * for both */
 #ifdef _MSC_VER
-#  include <intrin.h>
-#  pragma intrinsic(_BitScanForward)
-static __forceinline int ffsl(long x) {
+#	include <intrin.h>
+#	pragma intrinsic(_BitScanForward)
+static __forceinline int
+ffsl(long x) {
 	unsigned long i;
 
 	if (_BitScanForward(&i, x)) {
@@ -15,44 +16,46 @@ static __forceinline int ffsl(long x) {
 	return 0;
 }
 
-static __forceinline int ffs(int x) {
+static __forceinline int
+ffs(int x) {
 	return ffsl(x);
 }
 
-#  ifdef  _M_X64
-#    pragma intrinsic(_BitScanForward64)
-#  endif
+#	ifdef _M_X64
+#		pragma intrinsic(_BitScanForward64)
+#	endif
 
-static __forceinline int ffsll(unsigned __int64 x) {
+static __forceinline int
+ffsll(unsigned __int64 x) {
 	unsigned long i;
-#ifdef  _M_X64
+#	ifdef _M_X64
 	if (_BitScanForward64(&i, x)) {
 		return i + 1;
 	}
 	return 0;
-#else
-// Fallback for 32-bit build where 64-bit version not available
-// assuming little endian
+#	else
+	// Fallback for 32-bit build where 64-bit version not available
+	// assuming little endian
 	union {
 		unsigned __int64 ll;
-		unsigned   long l[2];
+		unsigned long    l[2];
 	} s;
 
 	s.ll = x;
 
 	if (_BitScanForward(&i, s.l[0])) {
 		return i + 1;
-	} else if(_BitScanForward(&i, s.l[1])) {
+	} else if (_BitScanForward(&i, s.l[1])) {
 		return i + 33;
 	}
 	return 0;
-#endif
+#	endif
 }
 
 #else
-#  define ffsll(x) __builtin_ffsll(x)
-#  define ffsl(x) __builtin_ffsl(x)
-#  define ffs(x) __builtin_ffs(x)
+#	define ffsll(x) __builtin_ffsll(x)
+#	define ffsl(x) __builtin_ffsl(x)
+#	define ffs(x) __builtin_ffs(x)
 #endif
 
 #endif /* strings_h */
diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index c428a86d..b50770d1 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -3,10 +3,12 @@ exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
 install_suffix=@install_suffix@
+jemalloc_prefix=@JEMALLOC_PREFIX@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
-URL: http://jemalloc.net/
+URL: https://jemalloc.net/
 Version: @jemalloc_version_major@.@jemalloc_version_minor@.@jemalloc_version_bugfix@_@jemalloc_version_nrev@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}
+Libs.private: @LIBS@
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
deleted file mode 100644
index 633a7d49..00000000
--- a/msvc/ReadMe.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-How to build jemalloc for Windows
-=================================
-
-1. Install Cygwin with at least the following packages:
-   * autoconf
-   * autogen
-   * gawk
-   * grep
-   * sed
-
-2. Install Visual Studio 2015 or 2017 with Visual C++
-
-3. Add Cygwin\bin to the PATH environment variable
-
-4. Open "x64 Native Tools Command Prompt for VS 2017"
-   (note: x86/x64 doesn't matter at this point)
-
-5. Generate header files:
-   sh -c "CC=cl ./autogen.sh"
-
-6. Now the project can be opened and built in Visual Studio:
-   msvc\jemalloc_vc2017.sln
diff --git a/msvc/jemalloc_vc2019.sln b/msvc/jemalloc_vc2019.sln
new file mode 100644
index 00000000..871ea9d4
--- /dev/null
+++ b/msvc/jemalloc_vc2019.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2019\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2019\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/jemalloc_vc2022.sln b/msvc/jemalloc_vc2022.sln
new file mode 100644
index 00000000..898574f1
--- /dev/null
+++ b/msvc/jemalloc_vc2022.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2022\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2022\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index ec028a1a..ca2a8532 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -61,7 +61,9 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -72,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
@@ -94,8 +95,10 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
@@ -377,4 +380,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..443e71a5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,15 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -100,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -160,12 +163,18 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -194,4 +203,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index a8004dbd..c5d1116b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -61,7 +61,9 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -72,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
@@ -94,8 +95,10 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
@@ -376,4 +379,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..443e71a5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,15 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -100,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -160,12 +163,18 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -194,4 +203,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
new file mode 100644
index 00000000..4df570c8
--- /dev/null
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,382 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="16.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
+    <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
+    <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 00000000..443e71a5
--- /dev/null
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,206 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hook.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\large.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
diff --git a/msvc/projects/vc2019/test_threads/test_threads.vcxproj b/msvc/projects/vc2019/test_threads/test_threads.vcxproj
new file mode 100644
index 00000000..8471a41e
--- /dev/null
+++ b/msvc/projects/vc2019/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="16.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 00000000..fa4588fd
--- /dev/null
+++ b/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
new file mode 100644
index 00000000..5e256ec6
--- /dev/null
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,382 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="17.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
+    <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
+    <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 00000000..443e71a5
--- /dev/null
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,206 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hook.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\large.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
diff --git a/msvc/projects/vc2022/test_threads/test_threads.vcxproj b/msvc/projects/vc2022/test_threads/test_threads.vcxproj
new file mode 100644
index 00000000..471f693b
--- /dev/null
+++ b/msvc/projects/vc2022/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 00000000..fa4588fd
--- /dev/null
+++ b/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/test_threads/test_threads.cpp b/msvc/test_threads/test_threads.cpp
index 6eed028d..e709c177 100644
--- a/msvc/test_threads/test_threads.cpp
+++ b/msvc/test_threads/test_threads.cpp
@@ -12,78 +12,108 @@
 #define JEMALLOC_NO_DEMANGLE
 #include <jemalloc/jemalloc.h>
 
-using std::vector;
+using std::minstd_rand;
 using std::thread;
 using std::uniform_int_distribution;
-using std::minstd_rand;
+using std::vector;
 
-int test_threads() {
-  je_malloc_conf = "narenas:3";
-  int narenas = 0;
-  size_t sz = sizeof(narenas);
-  je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
-  if (narenas != 3) {
-    printf("Error: unexpected number of arenas: %d\n", narenas);
-    return 1;
-  }
-  static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
-  static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
-  vector<thread> workers;
-  static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
-  je_malloc_stats_print(NULL, NULL, NULL);
-  size_t allocated1;
-  size_t sz1 = sizeof(allocated1);
-  je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
-  printf("\nPress Enter to start threads...\n");
-  getchar();
-  printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
-  for (int i = 0; i < numThreads; i++) {
-    workers.emplace_back([tid=i]() {
-      uniform_int_distribution<int> sizeDist(0, numSizes - 1);
-      minstd_rand rnd(tid * 17);
-      uint8_t* ptrs[numAllocsMax];
-      int ptrsz[numAllocsMax];
-      for (int i = 0; i < numIter1; ++i) {
-        thread t([&]() {
-          for (int i = 0; i < numIter2; ++i) {
-            const int numAllocs = numAllocsMax - sizeDist(rnd);
-            for (int j = 0; j < numAllocs; j += 64) {
-              const int x = sizeDist(rnd);
-              const int sz = sizes[x];
-              ptrsz[j] = sz;
-              ptrs[j] = (uint8_t*)je_malloc(sz);
-              if (!ptrs[j]) {
-                printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
-                exit(1);
-              }
-              for (int k = 0; k < sz; k++)
-                ptrs[j][k] = tid + k;
-            }
-            for (int j = 0; j < numAllocs; j += 64) {
-              for (int k = 0, sz = ptrsz[j]; k < sz; k++)
-                if (ptrs[j][k] != (uint8_t)(tid + k)) {
-                  printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
-                  exit(1);
-                }
-              je_free(ptrs[j]);
-            }
-          }
-        });
-        t.join();
-      }
-    });
-  }
-  for (thread& t : workers) {
-    t.join();
-  }
-  je_malloc_stats_print(NULL, NULL, NULL);
-  size_t allocated2;
-  je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
-  size_t leaked = allocated2 - allocated1;
-  printf("\nDone. Leaked: %zd bytes\n", leaked);
-  bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
-  printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
-  printf("\nPress Enter to continue...\n");
-  getchar();
-  return failed ? 1 : 0;
+int
+test_threads() {
+	je_malloc_conf = "narenas:3";
+	int    narenas = 0;
+	size_t sz = sizeof(narenas);
+	je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
+	if (narenas != 3) {
+		printf("Error: unexpected number of arenas: %d\n", narenas);
+		return 1;
+	}
+	static const int sizes[] = {7, 16, 32, 60, 91, 100, 120, 144, 169, 199,
+	    255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999,
+	    123123, 255265, 2333111};
+	static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
+	vector<thread>   workers;
+	static const int numThreads = narenas + 1, numAllocsMax = 25,
+	                 numIter1 = 50, numIter2 = 50;
+	je_malloc_stats_print(NULL, NULL, NULL);
+	size_t allocated1;
+	size_t sz1 = sizeof(allocated1);
+	je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
+	printf("\nPress Enter to start threads...\n");
+	getchar();
+	printf("Starting %d threads x %d x %d iterations...\n", numThreads,
+	    numIter1, numIter2);
+	for (int i = 0; i < numThreads; i++) {
+		workers.emplace_back([tid = i]() {
+			uniform_int_distribution<int> sizeDist(0, numSizes - 1);
+			minstd_rand                   rnd(tid * 17);
+			uint8_t                      *ptrs[numAllocsMax];
+			int                           ptrsz[numAllocsMax];
+			for (int i = 0; i < numIter1; ++i) {
+				thread t([&]() {
+					for (int i = 0; i < numIter2; ++i) {
+						const int numAllocs =
+						    numAllocsMax
+						    - sizeDist(rnd);
+						for (int j = 0; j < numAllocs;
+						     j += 64) {
+							const int x = sizeDist(
+							    rnd);
+							const int sz = sizes[x];
+							ptrsz[j] = sz;
+							ptrs[j] = (uint8_t *)
+							    je_malloc(sz);
+							if (!ptrs[j]) {
+								printf(
+								    "Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n",
+								    sz, tid, i,
+								    j, x);
+								exit(1);
+							}
+							for (int k = 0; k < sz;
+							     k++)
+								ptrs[j][k] = tid
+								    + k;
+						}
+						for (int j = 0; j < numAllocs;
+						     j += 64) {
+							for (int k = 0,
+							         sz = ptrsz[j];
+							     k < sz; k++)
+								if (ptrs[j][k]
+								    != (uint8_t)(tid
+								        + k)) {
+									printf(
+									    "Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n",
+									    tid,
+									    i,
+									    j,
+									    k,
+									    ptrs[j]
+									        [k],
+									    (uint8_t)(tid
+									        + k));
+									exit(1);
+								}
+							je_free(ptrs[j]);
+						}
+					}
+				});
+				t.join();
+			}
+		});
+	}
+	for (thread &t : workers) {
+		t.join();
+	}
+	je_malloc_stats_print(NULL, NULL, NULL);
+	size_t allocated2;
+	je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
+	size_t leaked = allocated2 - allocated1;
+	printf("\nDone. Leaked: %zd bytes\n", leaked);
+	bool failed = leaked
+	    > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
+	printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
+	printf("\nPress Enter to continue...\n");
+	getchar();
+	return failed ? 1 : 0;
 }
diff --git a/msvc/test_threads/test_threads_main.cpp b/msvc/test_threads/test_threads_main.cpp
index 0a022fba..3e88c286 100644
--- a/msvc/test_threads/test_threads_main.cpp
+++ b/msvc/test_threads/test_threads_main.cpp
@@ -5,7 +5,8 @@
 
 using namespace std::chrono_literals;
 
-int main(int argc, char** argv) {
-  int rc = test_threads();
-  return rc;
+int
+main(int argc, char **argv) {
+	int rc = test_threads();
+	return rc;
 }
diff --git a/scripts/README_GH_ACTIONS.md b/scripts/README_GH_ACTIONS.md
new file mode 100644
index 00000000..1cb236ad
--- /dev/null
+++ b/scripts/README_GH_ACTIONS.md
@@ -0,0 +1,181 @@
+# GitHub Actions Workflow Generator
+
+This directory contains `gen_gh_actions.py`, a script to generate GitHub Actions CI workflows from the same configuration logic used for Travis CI.
+
+## Usage
+
+The script can generate workflows for different platforms:
+
+```bash
+# Generate Linux CI workflow (default)
+./scripts/gen_gh_actions.py linux > .github/workflows/linux-ci.yml
+
+# Generate macOS CI workflow
+./scripts/gen_gh_actions.py macos > .github/workflows/macos-ci.yml
+
+# Generate Windows CI workflow
+./scripts/gen_gh_actions.py windows > .github/workflows/windows-ci.yml
+
+# Generate FreeBSD CI workflow
+./scripts/gen_gh_actions.py freebsd > .github/workflows/freebsd-ci.yml
+
+# Generate combined workflow with all platforms
+./scripts/gen_gh_actions.py all > .github/workflows/ci-all.yml
+```
+
+## Generated Workflows
+
+### Linux CI (`linux-ci.yml`)
+- **test-linux** (AMD64): `ubuntu-latest` (x86_64)
+  - ~96 configurations covering GCC, Clang, various flags
+- **test-linux-arm64** (ARM64): `ubuntu-24.04-arm` (aarch64)
+  - ~14 configurations including large hugepage tests
+  - **Note:** Free ARM64 runners (Public Preview) - may have longer queue times during peak hours
+
+**Total:** 110 configurations
+
+### macOS CI (`macos-ci.yml`)
+- **test-macos** (Intel): `macos-15-intel` (x86_64)
+  - ~10 configurations with GCC compiler
+- **test-macos-arm64** (Apple Silicon): `macos-latest` (arm64)
+  - ~11 configurations including large hugepage tests
+
+**Total:** 21 configurations
+
+### Windows CI (`windows-ci.yml`)
+- **test-windows** (AMD64): `windows-latest` (x86_64)
+  - 10 configurations covering MinGW-GCC and MSVC compilers
+  - 32-bit and 64-bit builds
+  - Uses MSYS2 for build environment
+
+**Total:** 10 configurations
+
+### FreeBSD CI (`freebsd-ci.yml`)
+- **test-freebsd** (AMD64): Runs in FreeBSD VM on `ubuntu-latest`
+  - Matrix testing: debug (on/off), prof (on/off), arch (32/64-bit), uncommon configs
+  - 16 total configuration combinations
+  - Uses FreeBSD 15.0 via `vmactions/freebsd-vm@v1`
+  - Uses `gmake` (GNU Make) instead of BSD make
+
+**Total:** 16 configurations
+
+## Architecture Verification
+
+Each workflow includes a "Show OS version" step that prints:
+
+**Linux:**
+```bash
+=== System Information ===
+uname -a              # Kernel and architecture
+=== Architecture ===
+uname -m              # x86_64, aarch64, etc.
+arch                 # Architecture type
+=== CPU Info ===
+lscpu                # Detailed CPU information
+```
+
+**macOS:**
+```bash
+=== macOS Version ===
+sw_vers              # macOS version and build
+=== Architecture ===
+uname -m             # x86_64 or arm64
+arch                # i386 or arm64
+=== CPU Info ===
+sysctl machdep.cpu.brand_string  # CPU model
+```
+
+**Windows:**
+```cmd
+=== Windows Version ===
+systeminfo           # OS name and version
+ver                 # Windows version
+=== Architecture ===
+PROCESSOR_ARCHITECTURE  # AMD64, x86, ARM64
+```
+
+## GitHub Runner Images
+
+| Platform | Runner Label | Architecture | OS Version | Strategy |
+|----------|--------------|--------------|------------|----------|
+| Linux AMD64 | ubuntu-latest | x86_64 | Ubuntu 22.04+ | Auto-update |
+| Linux ARM64 | ubuntu-24.04-arm | aarch64 | Ubuntu 24.04 | Free (Public Preview) |
+| macOS Intel | macos-15-intel | x86_64 | macOS 15 Sequoia | Pinned |
+| macOS Apple Silicon | macos-15 | arm64 | macOS 15 Sequoia | Pinned |
+| Windows | windows-latest | x86_64 | Windows Server 2022+ | Auto-update |
+| FreeBSD | ubuntu-latest (VM) | x86_64 | FreeBSD 15.0 in VM | VM-based |
+
+### Runner Strategy Explained
+
+We use a **hybrid approach** to balance stability and maintenance:
+
+**Auto-update runners (`-latest`):**
+- **Linux AMD64**: `ubuntu-latest` - Very stable, rarely breaks, auto-updates to newest Ubuntu LTS
+- **Windows**: `windows-latest` - Backward compatible, auto-updates to newest Windows Server
+
+**Pinned runners (specific versions):**
+- **Linux ARM64**: `ubuntu-24.04-arm` - **Free for public repos** (Public Preview, may have queue delays)
+- **macOS Intel**: `macos-15-intel` - Last Intel macOS runner (EOL **August 2027**)
+- **macOS Apple Silicon**: `macos-15` - Pin for control over macOS upgrades
+
+**Why this approach?**
+-  Reduces maintenance (auto-update where safe)
+-  Prevents surprise breakages (pin where needed)
+-  Balances stability and staying current
+-  Uses free ARM64 runners for public repositories
+
+### ARM64 Queue Times
+
+**If you experience long waits for ARM64 jobs:**
+
+The `ubuntu-24.04-arm` runner is **free for public repositories** but is in **Public Preview**. GitHub warns: *"you may experience longer queue times during peak usage hours"*.
+
+To reduce wait times we should upgrade to Team/Enterprise plan - then we could use `ubuntu-24.04-arm64` for faster, paid runners
+
+### Important Deprecation Timeline
+
+| Date | Event | Action Required |
+|------|-------|------------------|
+| **August 2027** | macOS Intel runners removed | Must drop Intel macOS testing or use self-hosted |
+| **TBD** | ARM64 runners leave Public Preview | May see improved queue times |
+
+**Note:** `macos-15-intel` is the **last Intel-based macOS runner** from GitHub Actions. After August 2027, only Apple Silicon runners will be available.
+
+## Platform-Specific Details
+
+### Windows Build Process
+The Windows workflow uses:
+1. **MSYS2** setup via `msys2/setup-msys2@v2` action
+2. **MinGW-GCC**: Standard autotools build process in MSYS2 shell
+3. **MSVC (cl.exe)**: Requires `ilammy/msvc-dev-cmd@v1` for environment setup
+   - Uses `MSYS2_PATH_TYPE: inherit` to inherit Windows PATH
+   - Exports `AR=lib.exe`, `NM=dumpbin.exe`, `RANLIB=:`
+4. **mingw32-make**: Used instead of `make` (standard in MSYS2)
+
+### macOS Build Process
+- Uses Homebrew to install `autoconf`
+- Tests on both Intel (x86_64) and Apple Silicon (ARM64)
+- Standard autotools build process
+- Excludes certain malloc configurations not supported on macOS
+
+### Linux Build Process
+- Ubuntu Latest for AMD64, Ubuntu 24.04 for ARM64
+- Installs 32-bit cross-compilation dependencies when needed
+- Most comprehensive test matrix (110 configurations)
+
+## Relationship to Travis CI
+
+This script mirrors the logic from `gen_travis.py` but generates GitHub Actions workflows instead of `.travis.yml`. The test matrices are designed to provide equivalent coverage to the Travis CI configuration.
+
+## Regenerating Workflows
+
+To regenerate all workflows after modifying `gen_gh_actions.py`:
+
+```bash
+./scripts/gen_gh_actions.py linux > .github/workflows/linux-ci.yml
+./scripts/gen_gh_actions.py macos > .github/workflows/macos-ci.yml
+./scripts/gen_gh_actions.py windows > .github/workflows/windows-ci.yml
+```
+
+**Note**: The generated files should not be edited by hand. All changes should be made to `gen_gh_actions.py` and then regenerated.
+
diff --git a/scripts/check_trailing_whitespace.sh b/scripts/check_trailing_whitespace.sh
new file mode 100755
index 00000000..7aafe131
--- /dev/null
+++ b/scripts/check_trailing_whitespace.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+if git grep -E '\s+$' -- ':!*.md' ':!build-aux/install-sh'
+then
+	echo 'Error: found trailing whitespace' 1>&2
+	exit 1
+fi
diff --git a/scripts/gen_gh_actions.py b/scripts/gen_gh_actions.py
new file mode 100755
index 00000000..4c5474ab
--- /dev/null
+++ b/scripts/gen_gh_actions.py
@@ -0,0 +1,686 @@
+#!/usr/bin/env python3
+
+from itertools import combinations, chain
+from enum import Enum, auto
+
+
+LINUX = 'ubuntu-24.04'
+OSX = 'macos-latest'
+WINDOWS = 'windows-latest'
+FREEBSD = 'freebsd'
+
+AMD64 = 'amd64'
+ARM64 = 'arm64'
+PPC64LE = 'ppc64le'
+
+
+GITHUB_ACTIONS_TEMPLATE = """\
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: {name}
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+{jobs}
+"""
+
+
+class Option(object):
+    class Type:
+        COMPILER = auto()
+        COMPILER_FLAG = auto()
+        CONFIGURE_FLAG = auto()
+        MALLOC_CONF = auto()
+        FEATURE = auto()
+
+    def __init__(self, type, value):
+        self.type = type
+        self.value = value
+
+    @staticmethod
+    def as_compiler(value):
+        return Option(Option.Type.COMPILER, value)
+
+    @staticmethod
+    def as_compiler_flag(value):
+        return Option(Option.Type.COMPILER_FLAG, value)
+
+    @staticmethod
+    def as_configure_flag(value):
+        return Option(Option.Type.CONFIGURE_FLAG, value)
+
+    @staticmethod
+    def as_malloc_conf(value):
+        return Option(Option.Type.MALLOC_CONF, value)
+
+    @staticmethod
+    def as_feature(value):
+        return Option(Option.Type.FEATURE, value)
+
+    def __eq__(self, obj):
+        return (isinstance(obj, Option) and obj.type == self.type
+                and obj.value == self.value)
+
+    def __repr__(self):
+        type_names = {
+            Option.Type.COMPILER: 'COMPILER',
+            Option.Type.COMPILER_FLAG: 'COMPILER_FLAG',
+            Option.Type.CONFIGURE_FLAG: 'CONFIGURE_FLAG',
+            Option.Type.MALLOC_CONF: 'MALLOC_CONF',
+            Option.Type.FEATURE: 'FEATURE'
+        }
+        return f"Option({type_names[self.type]}, {repr(self.value)})"
+
+
+# The 'default' configuration is gcc, on linux, with no compiler or configure
+# flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
+# --disable-stats, and --with-malloc-conf=tcache:false.  To avoid abusing
+# CI resources though, we don't test all 2**7 = 128 possible combinations of these;
+# instead, we only test combinations of up to 2 'unusual' settings, under the
+# hope that bugs involving interactions of such settings are rare.
+MAX_UNUSUAL_OPTIONS = 2
+
+
+GCC = Option.as_compiler('CC=gcc CXX=g++')
+CLANG = Option.as_compiler('CC=clang CXX=clang++')
+CL = Option.as_compiler('CC=cl.exe CXX=cl.exe')
+
+
+compilers_unusual = [CLANG,]
+
+
+CROSS_COMPILE_32BIT = Option.as_feature('CROSS_COMPILE_32BIT')
+feature_unusuals = [CROSS_COMPILE_32BIT]
+
+
+configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
+    '--enable-debug',
+    '--enable-prof',
+    '--disable-stats',
+    '--disable-libdl',
+    '--enable-opt-safety-checks',
+    '--with-lg-page=16',
+    '--with-lg-page=16 --with-lg-hugepage=29',
+)]
+LARGE_HUGEPAGE = Option.as_configure_flag("--with-lg-page=16 --with-lg-hugepage=29")
+
+
+malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
+    'tcache:false',
+    'dss:primary',
+    'percpu_arena:percpu',
+    'background_thread:true',
+)]
+
+
+all_unusuals = (compilers_unusual + feature_unusuals
+    + configure_flag_unusuals + malloc_conf_unusuals)
+
+
+def get_extra_cflags(os, compiler):
+    if os == WINDOWS:
+        # For non-CL compilers under Windows (for now it's only MinGW-GCC),
+        # -fcommon needs to be specified to correctly handle multiple
+        # 'malloc_conf' symbols and such, which are declared weak under Linux.
+        # Weak symbols don't work with MinGW-GCC.
+        if compiler != CL.value:
+            return ['-fcommon']
+        else:
+            return []
+
+    # We get some spurious errors when -Warray-bounds is enabled.
+    extra_cflags = ['-Werror', '-Wno-array-bounds']
+    if compiler == CLANG.value or os == OSX:
+        extra_cflags += [
+            '-Wno-unknown-warning-option',
+            '-Wno-ignored-attributes'
+        ]
+    if os == OSX:
+        extra_cflags += [
+            '-Wno-deprecated-declarations',
+        ]
+    return extra_cflags
+
+
+def format_env_dict(os, arch, combination):
+    """Format environment variables as a dictionary for the matrix."""
+    compilers = [x.value for x in combination if x.type == Option.Type.COMPILER]
+    compiler_flags = [x.value for x in combination if x.type == Option.Type.COMPILER_FLAG]
+    configure_flags = [x.value for x in combination if x.type == Option.Type.CONFIGURE_FLAG]
+    malloc_conf = [x.value for x in combination if x.type == Option.Type.MALLOC_CONF]
+    features = [x.value for x in combination if x.type == Option.Type.FEATURE]
+
+    if len(malloc_conf) > 0:
+        configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
+
+    if not compilers:
+        compiler = GCC.value
+    else:
+        compiler = compilers[0]
+
+    cross_compile = CROSS_COMPILE_32BIT.value in features
+    if os == LINUX and cross_compile:
+        compiler_flags.append('-m32')
+
+    env_dict = {}
+
+    # Parse compiler
+    cc_parts = compiler.split()
+    for part in cc_parts:
+        if part.startswith('CC='):
+            env_dict['CC'] = part.split('=')[1]
+        elif part.startswith('CXX='):
+            env_dict['CXX'] = part.split('=')[1]
+
+    # Add features
+    for feature in features:
+        env_dict[feature] = 'yes'
+
+    # Add flags
+    if compiler_flags:
+        env_dict['COMPILER_FLAGS'] = ' '.join(compiler_flags)
+    if configure_flags:
+        env_dict['CONFIGURE_FLAGS'] = ' '.join(configure_flags)
+
+    extra_cflags = get_extra_cflags(os, compiler)
+    if extra_cflags:
+        env_dict['EXTRA_CFLAGS'] = ' '.join(extra_cflags)
+
+    return env_dict
+
+
+def generate_job_matrix_entries(os, arch, exclude, max_unusual_opts, unusuals=all_unusuals):
+    """Generate matrix entries for a job."""
+    entries = []
+    for combination in chain.from_iterable(
+            [combinations(unusuals, i) for i in range(max_unusual_opts + 1)]):
+        if not any(excluded in combination for excluded in exclude):
+            env_dict = format_env_dict(os, arch, combination)
+            entries.append(env_dict)
+    return entries
+
+
+def generate_linux_job(arch):
+    """Generate Linux job configuration."""
+    os = LINUX
+
+    # Only generate 2 unusual options for AMD64 to reduce matrix size
+    max_unusual_opts = MAX_UNUSUAL_OPTIONS if arch == AMD64 else 1
+
+    exclude = []
+    if arch == PPC64LE:
+        # Avoid 32 bit builds and clang on PowerPC
+        exclude = (CROSS_COMPILE_32BIT, CLANG,)
+    if arch == ARM64:
+        # Avoid 32 bit build on ARM64
+        exclude = (CROSS_COMPILE_32BIT,)
+
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
+    linux_configure_flags = list(configure_flag_unusuals)
+    linux_configure_flags.append(Option.as_configure_flag("--enable-prof --enable-prof-frameptr"))
+
+    linux_unusuals = (compilers_unusual + feature_unusuals
+                    + linux_configure_flags + malloc_conf_unusuals)
+
+    matrix_entries = generate_job_matrix_entries(os, arch, exclude, max_unusual_opts, linux_unusuals)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Select appropriate runner based on architecture
+    if arch == ARM64:
+        runner = "ubuntu-24.04-arm"    # Free ARM64 runner for public repos (Public Preview)
+    elif arch == PPC64LE:
+        # GitHub doesn't provide PPC runners, would need self-hosted
+        runner = "self-hosted-ppc64le"
+    else:  # AMD64
+        runner = "ubuntu-24.04"        # Ubuntu 24.04 LTS
+
+    job = f"""  test-linux{arch_suffix}:
+    runs-on: {runner}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            # Properly escape values with special characters
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    # Add manual job entries
+    manual_entries = [
+        {
+            'CC': 'gcc',
+            'CXX': 'g++',
+            'CONFIGURE_FLAGS': '--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof',
+            'EXTRA_CFLAGS': '-Werror -Wno-array-bounds'
+        },
+        {
+            'CC': 'gcc',
+            'CXX': 'g++',
+            'CONFIGURE_FLAGS': '--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof',
+            'EXTRA_CFLAGS': '-Werror -Wno-array-bounds'
+        }
+    ]
+
+    if arch == AMD64:
+        for entry in manual_entries:
+            job += "          - env:\n"
+            for key, value in entry.items():
+                if ' ' in str(value):
+                    job += f'              {key}: "{value}"\n'
+                else:
+                    job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{{{ matrix.env.CC }}}}
+        CXX: ${{{{ matrix.env.CXX }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+"""
+
+    return job
+
+
+def generate_macos_job(arch):
+    """Generate macOS job configuration."""
+    os = OSX
+    max_unusual_opts = 1
+
+    exclude = ([Option.as_malloc_conf(opt) for opt in (
+            'dss:primary',
+            'background_thread:true')] +
+        [Option.as_configure_flag('--enable-prof')] +
+        [CLANG,])
+
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
+    matrix_entries = generate_job_matrix_entries(os, arch, exclude, max_unusual_opts)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Select appropriate runner based on architecture
+    # Pin both for more control over OS upgrades
+    if arch == ARM64:
+        runner = "macos-15"          # Pinned macOS 15 on Apple Silicon
+    else:  # AMD64
+        runner = "macos-15-intel"    # Pinned macOS 15 on Intel (last Intel runner, EOL Aug 2027)
+
+    job = f"""  test-macos{arch_suffix}:
+    runs-on: {runner}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{{{ matrix.env.CC || 'gcc' }}}}
+        CXX: ${{{{ matrix.env.CXX || 'g++' }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+"""
+
+    return job
+
+
+def generate_windows_job(arch):
+    """Generate Windows job configuration."""
+    os = WINDOWS
+    max_unusual_opts = 3
+    unusuals = (
+        Option.as_configure_flag('--enable-debug'),
+        CL,
+        CROSS_COMPILE_32BIT,
+    )
+
+    matrix_entries = generate_job_matrix_entries(os, arch, (), max_unusual_opts, unusuals)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Use latest for Windows - tends to be backward compatible and stable
+    job = f"""  test-windows{arch_suffix}:
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      shell: cmd
+      run: |
+        echo === Windows Version ===
+        systeminfo | findstr /B /C:"OS Name" /C:"OS Version"
+        ver
+        echo.
+        echo === Architecture ===
+        echo PROCESSOR_ARCHITECTURE=%PROCESSOR_ARCHITECTURE%
+        echo.
+
+    - name: Setup MSYS2
+      uses: msys2/setup-msys2@v2
+      with:
+        msystem: ${{{{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'MINGW32' || 'MINGW64' }}}}
+        update: true
+        install: >-
+          autotools
+          git
+        pacboy: >-
+          make:p
+          gcc:p
+          binutils:p
+
+    - name: Build and test (MinGW-GCC)
+      if: matrix.env.CC != 'cl.exe'
+      shell: msys2 {{0}}
+      env:
+        CC: ${{{{ matrix.env.CC || 'gcc' }}}}
+        CXX: ${{{{ matrix.env.CXX || 'g++' }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+    - name: Setup MSVC environment
+      if: matrix.env.CC == 'cl.exe'
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        arch: ${{{{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'x86' || 'x64' }}}}
+
+    - name: Build and test (MSVC)
+      if: matrix.env.CC == 'cl.exe'
+      shell: msys2 {{0}}
+      env:
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        MSYS2_PATH_TYPE: inherit
+      run: |
+        # Export MSVC environment variables for configure
+        export CC=cl.exe
+        export CXX=cl.exe
+        export AR=lib.exe
+        export NM=dumpbin.exe
+        export RANLIB=:
+
+        # Verify cl.exe is accessible (should be in PATH via inherit)
+        if ! which cl.exe > /dev/null 2>&1; then
+          echo "cl.exe not found, trying to locate MSVC..."
+          # Find and add MSVC bin directory to PATH
+          MSVC_BIN=$(cmd.exe /c "echo %VCToolsInstallDir%" | tr -d '\\\\r' | sed 's/\\\\\\\\\\\\\\\\/\\//g' | sed 's/C:/\\\\/c/g')
+          if [ -n "$MSVC_BIN" ]; then
+            export PATH="$PATH:$MSVC_BIN/bin/Hostx64/x64:$MSVC_BIN/bin/Hostx86/x86"
+          fi
+        fi
+
+        # Run autoconf
+        autoconf
+
+        # Configure with MSVC
+        ./configure CC=cl.exe CXX=cl.exe AR=lib.exe $CONFIGURE_FLAGS
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        # Build tests sequentially due to PDB file issues
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+"""
+
+    return job
+
+
+def generate_freebsd_job(arch):
+    """Generate FreeBSD job configuration."""
+    # FreeBSD runs in a VM on ubuntu-latest, not native
+
+    job = f"""  test-freebsd:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        debug: ['--enable-debug', '--disable-debug']
+        prof: ['--enable-prof', '--disable-prof']
+        arch: ['64-bit', '32-bit']
+        uncommon:
+          - ''
+          - '--with-lg-page=16 --with-malloc-conf=tcache:false'
+
+    name: FreeBSD (${{{{ matrix.arch }}}}, debug=${{{{ matrix.debug }}}}, prof=${{{{ matrix.prof }}}}${{{{ matrix.uncommon && ', uncommon' || '' }}}})
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 1
+
+    - name: Test on FreeBSD
+      uses: vmactions/freebsd-vm@v1
+      with:
+        release: '15.0'
+        usesh: true
+        prepare: |
+          pkg install -y autoconf gmake
+        run: |
+          # Verify we're running in FreeBSD
+          echo "==== System Information ===="
+          uname -a
+          freebsd-version
+          echo "============================"
+
+          # Set compiler flags for 32-bit if needed
+          if [ "${{{{ matrix.arch }}}}" = "32-bit" ]; then
+            export CC="cc -m32"
+            export CXX="c++ -m32"
+          fi
+
+          # Generate configure script
+          autoconf
+
+          # Configure with matrix options
+          ./configure --with-jemalloc-prefix=ci_ ${{{{ matrix.debug }}}} ${{{{ matrix.prof }}}} ${{{{ matrix.uncommon }}}}
+
+          # Get CPU count for parallel builds
+          export JFLAG=$(sysctl -n kern.smp.cpus)
+
+          gmake -j${{JFLAG}}
+          gmake -j${{JFLAG}} tests
+          gmake check
+
+"""
+
+    return job
+
+
+def main():
+    import sys
+
+    # Determine which workflow to generate based on command-line argument
+    workflow_type = sys.argv[1] if len(sys.argv) > 1 else 'linux'
+
+    if workflow_type == 'linux':
+        jobs = '\n'.join((
+            generate_linux_job(AMD64),
+            generate_linux_job(ARM64),
+        ))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='Linux CI', jobs=jobs))
+
+    elif workflow_type == 'macos':
+        jobs = '\n'.join((
+            generate_macos_job(AMD64),   # Intel x86_64
+            generate_macos_job(ARM64),   # Apple Silicon
+        ))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='macOS CI', jobs=jobs))
+
+    elif workflow_type == 'windows':
+        jobs = generate_windows_job(AMD64)
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='Windows CI', jobs=jobs))
+
+    elif workflow_type == 'freebsd':
+        jobs = generate_freebsd_job(AMD64)
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='FreeBSD CI', jobs=jobs))
+
+    elif workflow_type == 'all':
+        # Generate all workflow files
+        linux_jobs = '\n'.join((
+            generate_linux_job(AMD64),
+            generate_linux_job(ARM64),
+        ))
+        macos_jobs = '\n'.join((
+            generate_macos_job(AMD64),   # Intel
+            generate_macos_job(ARM64),   # Apple Silicon
+        ))
+        windows_jobs = generate_windows_job(AMD64)
+        freebsd_jobs = generate_freebsd_job(AMD64)
+
+        all_jobs = '\n'.join((linux_jobs, macos_jobs, windows_jobs, freebsd_jobs))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='CI', jobs=all_jobs))
+
+    else:
+        print(f"Unknown workflow type: {workflow_type}", file=sys.stderr)
+        print("Usage: gen_gh_actions.py [linux|macos|windows|freebsd|all]", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 4366a066..3f7aeab0 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -24,7 +24,7 @@ TRAVIS_TEMPLATE = """\
 # Differences are explained here:
 # https://docs.travis-ci.com/user/languages/minimal-and-generic/
 language: minimal
-dist: focal
+dist: jammy
 
 jobs:
   include:
@@ -96,6 +96,15 @@ class Option(object):
         return (isinstance(obj, Option) and obj.type == self.type
                 and obj.value == self.value)
 
+    def __repr__(self):
+        type_names = {
+            Option.Type.COMPILER: 'COMPILER',
+            Option.Type.COMPILER_FLAG: 'COMPILER_FLAG',
+            Option.Type.CONFIGURE_FLAG: 'CONFIGURE_FLAG',
+            Option.Type.MALLOC_CONF: 'MALLOC_CONF',
+            Option.Type.FEATURE: 'FEATURE'
+        }
+        return f"Option({type_names[self.type]}, {repr(self.value)})"
 
 # The 'default' configuration is gcc, on linux, with no compiler or configure
 # flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
@@ -125,7 +134,9 @@ configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
     '--disable-libdl',
     '--enable-opt-safety-checks',
     '--with-lg-page=16',
+    '--with-lg-page=16 --with-lg-hugepage=29',
 )]
+LARGE_HUGEPAGE = Option.as_configure_flag("--with-lg-page=16 --with-lg-hugepage=29")
 
 
 malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
@@ -246,8 +257,20 @@ def generate_linux(arch):
     if arch == PPC64LE:
         # Avoid 32 bit builds and clang on PowerPC
         exclude = (CROSS_COMPILE_32BIT, CLANG,)
+    if arch == ARM64:
+        # Avoid 32 bit build on ARM64
+        exclude = (CROSS_COMPILE_32BIT,)
 
-    return generate_jobs(os, arch, exclude, max_unusual_opts)
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
+    linux_configure_flags = list(configure_flag_unusuals)
+    linux_configure_flags.append(Option.as_configure_flag("--enable-prof --enable-prof-frameptr"))
+
+    linux_unusuals = (compilers_unusual + feature_unusuals
+                    + linux_configure_flags + malloc_conf_unusuals)
+
+    return generate_jobs(os, arch, exclude, max_unusual_opts, linux_unusuals)
 
 
 def generate_macos(arch):
@@ -257,11 +280,13 @@ def generate_macos(arch):
 
     exclude = ([Option.as_malloc_conf(opt) for opt in (
             'dss:primary',
-            'percpu_arena:percpu',
             'background_thread:true')] +
         [Option.as_configure_flag('--enable-prof')] +
         [CLANG,])
 
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
 
@@ -308,14 +333,24 @@ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 def main():
     jobs = '\n'.join((
-        generate_windows(AMD64),
+        # Travis is failing on Windows due to infra failures, comment it out for
+        # now.  Should resume once it is fixed.
 
-        generate_freebsd(AMD64),
+        # generate_windows(AMD64),
+
+        # Travis currently provides only FreeBSD 12.1 which is EOL.  Builds are
+        # not working as of Jan 2024.  Disable the tests for now to avoid the
+        # noise / confusion.
+
+        # generate_freebsd(AMD64),
 
         generate_linux(AMD64),
-        generate_linux(PPC64LE),
+        # PPC tests on travis has been down for a while, disable it for now.
+        # generate_linux(PPC64LE),
+        generate_linux(ARM64),
 
-        generate_macos(AMD64),
+        # Starting April 1st, 2025, Travis no longer supports OSx/macOS builds
+        # generate_macos(AMD64),
 
         get_manual_jobs(),
     ))
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
new file mode 100755
index 00000000..1662f7eb
--- /dev/null
+++ b/scripts/run_static_analysis.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+git clean -Xfd
+
+export CC='clang'
+export CXX='clang++'
+compile_time_malloc_conf='background_thread:true,'\
+'metadata_thp:auto,'\
+'abort_conf:true,'\
+'muzzy_decay_ms:0,'\
+'zero_realloc:free,'\
+'prof_unbias:false,'\
+'prof_time_resolution:high'
+extra_flags=(
+	-Wmissing-prototypes
+	-Wmissing-variable-declarations
+	-Wstrict-prototypes
+	-Wunreachable-code
+	-Wunreachable-code-aggressive
+	-Wunused-macros
+)
+
+EXTRA_CFLAGS="${extra_flags[*]}" EXTRA_CXXFLAGS="${extra_flags[*]}" ./autogen.sh \
+	--with-private-namespace=jemalloc_ \
+	--disable-cache-oblivious \
+	--enable-prof \
+	--enable-prof-libunwind \
+	--with-malloc-conf="$compile_time_malloc_conf" \
+	--enable-readlinkat \
+	--enable-opt-safety-checks \
+	--enable-uaf-detection \
+	--enable-force-getenv \
+	--enable-debug # Enabling debug for static analysis is important,
+	               # otherwise you'll get tons of warnings for things
+	               # that are already covered by `assert`s.
+
+bear -- make -s -j "$(nproc)"
+# We end up with lots of duplicate entries in the compilation database, one for
+# each output file type (e.g. .o, .d, .sym, etc.). There must be exactly one
+# entry for each file in the compilation database in order for
+# cross-translation-unit analysis to work, so we deduplicate the database here.
+jq '[.[] | select(.output | test("/[^./]*\\.o$"))]' compile_commands.json > compile_commands.json.tmp
+mv compile_commands.json.tmp compile_commands.json
+
+# CodeChecker has a bug where it freaks out if you supply the skipfile via process substitution,
+# so we resort to manually creating a temporary file
+skipfile=$(mktemp)
+# The single-quotes are deliberate here, you want `$skipfile` to be evaluated upon exit
+trap 'rm -f $skipfile' EXIT
+echo '-**/stdlib.h' > "$skipfile"
+CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs "$(nproc)" \
+	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
+	--analyzers clangsa clang-tidy --skip "$skipfile" \
+	--enable readability-inconsistent-declaration-parameter-name \
+	--enable performance-no-int-to-ptr \
+	--disable clang-diagnostic-reserved-macro-identifier
+	# `--enable` is additive, the vast majority of the checks we want are
+	# enabled by default.
+
+html_output_dir="${1:-static_analysis_results}"
+result=${2:-/dev/null}
+# We're echoing a value because we want to indicate whether or not any errors
+# were found, but we always want the script to have a successful exit code so
+# that we actually reach the step in the GitHub action where we upload the results.
+if CodeChecker parse --export html --output "$html_output_dir" static_analysis_raw_results
+then
+	echo "HAS_STATIC_ANALYSIS_RESULTS=0" >> "$result"
+else
+	echo "HAS_STATIC_ANALYSIS_RESULTS=1" >> "$result"
+fi
diff --git a/src/arena.c b/src/arena.c
index 857b27c5..d7c8cd1f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,13 +21,8 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
  * Define names for both unininitialized and initialized phases, so that
  * options and mallctl processing are straightforward.
  */
-const char *percpu_arena_mode_names[] = {
-	"percpu",
-	"phycpu",
-	"disabled",
-	"percpu",
-	"phycpu"
-};
+const char *const percpu_arena_mode_names[] = {
+    "percpu", "phycpu", "disabled", "percpu", "phycpu"};
 percpu_arena_mode_t opt_percpu_arena = PERCPU_ARENA_DEFAULT;
 
 ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
@@ -36,22 +31,31 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
-emap_t arena_emap_global;
-pa_central_t arena_pa_central_global;
+emap_t              arena_emap_global;
+static pa_central_t arena_pa_central_global;
 
 div_info_t arena_binind_div_info[SC_NBINS];
 
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 
-uint32_t arena_bin_offsets[SC_NBINS];
+uint32_t        arena_bin_offsets[SC_NBINS];
 static unsigned nbins_total;
 
-static unsigned huge_arena_ind;
+/*
+ * a0 is used to handle huge requests before malloc init completes. After
+ * that,the huge_arena_ind is updated to point to the actual huge arena,
+ * which is the last one of the auto arenas.
+ */
+unsigned  huge_arena_ind = 0;
+bool      opt_huge_arena_pac_thp = false;
+pac_thp_t huge_arena_pac_thp = {.thp_madvise = false,
+    .auto_thp_switched = false,
+    .n_thp_lazy = ATOMIC_INIT(0)};
 
 const arena_config_t arena_config_default = {
-	/* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
-	/* .metadata_use_hooks = */ true,
+    /* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
+    /* .metadata_use_hooks = */ true,
 };
 
 /******************************************************************************/
@@ -60,13 +64,10 @@ const arena_config_t arena_config_default = {
  * definition.
  */
 
-static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
-    bool is_background_thread, bool all);
-static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin);
-static void
-arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    size_t npages_new);
+static bool arena_decay_dirty(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
+static void arena_maybe_do_deferred_work(
+    tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new);
 
 /******************************************************************************/
 
@@ -85,15 +86,17 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
+    hpa_shard_stats_t *hpastats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
 	    muzzy_decay_ms, nactive, ndirty, nmuzzy);
 
-	size_t base_allocated, base_resident, base_mapped, metadata_thp;
-	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
+	size_t base_allocated, base_edata_allocated, base_rtree_allocated,
+	    base_resident, base_mapped, metadata_thp;
+	base_stats_get(tsdn, arena->base, &base_allocated,
+	    &base_edata_allocated, &base_rtree_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
 	size_t pac_mapped_sz = pac_mapped(&arena->pa_shard.pac);
 	astats->mapped += base_mapped + pac_mapped_sz;
@@ -102,27 +105,32 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
 	astats->base += base_allocated;
+	astats->metadata_edata += base_edata_allocated;
+	astats->metadata_rtree += base_rtree_allocated;
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
 	astats->metadata_thp += metadata_thp;
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-		uint64_t nmalloc = locked_read_u64(tsdn,
-		    LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.lstats[i].nmalloc);
-		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
-		astats->nmalloc_large += nmalloc;
-
+		/* ndalloc should be read before nmalloc,
+		 * since otherwise it is possible for ndalloc to be incremented,
+		 * and the following can become true: ndalloc > nmalloc */
 		uint64_t ndalloc = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].ndalloc);
 		locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
 		astats->ndalloc_large += ndalloc;
 
+		uint64_t nmalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[i].nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
+		astats->nmalloc_large += nmalloc;
+
 		uint64_t nrequests = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);
-		locked_inc_u64_unsynchronized(&lstats[i].nrequests,
-		    nmalloc + nrequests);
+		locked_inc_u64_unsynchronized(
+		    &lstats[i].nrequests, nmalloc + nrequests);
 		astats->nrequests_large += nmalloc + nrequests;
 
 		/* nfill == nmalloc for large currently. */
@@ -139,12 +147,17 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		astats->allocated_large +=
-		    curlextents * sz_index2size(SC_NBINS + i);
+
+		uint64_t active_bytes = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[i].active_bytes);
+		locked_inc_u64_unsynchronized(
+		    &lstats[i].active_bytes, active_bytes);
+		astats->allocated_large += active_bytes;
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
-	    estats, hpastats, secstats, &astats->resident);
+	    estats, hpastats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
@@ -153,16 +166,19 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->tcache_stashed_bytes = 0;
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
-	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
-		for (szind_t i = 0; i < nhbins; i++) {
+	ql_foreach (descriptor, &arena->cache_bin_array_descriptor_ql, link) {
+		for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
-			cache_bin_sz_t ncached, nstashed;
-			cache_bin_nitems_get_remote(cache_bin,
-			    &tcache_bin_info[i], &ncached, &nstashed);
+			if (cache_bin_disabled(cache_bin)) {
+				continue;
+			}
 
+			cache_bin_sz_t ncached, nstashed;
+			cache_bin_nitems_get_remote(
+			    cache_bin, &ncached, &nstashed);
 			astats->tcache_bytes += ncached * sz_index2size(i);
-			astats->tcache_stashed_bytes += nstashed *
-			    sz_index2size(i);
+			astats->tcache_stashed_bytes += nstashed
+			    * sz_index2size(i);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -170,19 +186,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    &arena->tcache_ql_mtx);
 	malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 
-#define READ_ARENA_MUTEX_PROF_DATA(mtx, ind)				\
-    malloc_mutex_lock(tsdn, &arena->mtx);				\
-    malloc_mutex_prof_read(tsdn, &astats->mutex_prof_data[ind],		\
-        &arena->mtx);							\
-    malloc_mutex_unlock(tsdn, &arena->mtx);
+#define READ_ARENA_MUTEX_PROF_DATA(mtx, ind)                                   \
+	malloc_mutex_lock(tsdn, &arena->mtx);                                  \
+	malloc_mutex_prof_read(                                                \
+	    tsdn, &astats->mutex_prof_data[ind], &arena->mtx);                 \
+	malloc_mutex_unlock(tsdn, &arena->mtx);
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(base->mtx,
-	    arena_prof_mutex_base);
+	READ_ARENA_MUTEX_PROF_DATA(base->mtx, arena_prof_mutex_base);
 #undef READ_ARENA_MUTEX_PROF_DATA
-	pa_shard_mtx_stats_read(tsdn, &arena->pa_shard,
-	    astats->mutex_prof_data);
+	pa_shard_mtx_stats_read(
+	    tsdn, &arena->pa_shard, astats->mutex_prof_data);
 
 	nstime_copy(&astats->uptime, &arena->create_time);
 	nstime_update(&astats->uptime);
@@ -190,32 +205,33 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_stats_merge(tsdn, &bstats[i],
-			    arena_get_bin(arena, i, j));
+			bin_stats_merge(
+			    tsdn, &bstats[i], arena_get_bin(arena, i, j));
 		}
 	}
 }
 
 static void
-arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
-    bool is_background_thread) {
+arena_background_thread_inactivity_check(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread) {
 	if (!background_thread_enabled() || is_background_thread) {
 		return;
 	}
-	background_thread_info_t *info =
-	    arena_background_thread_info_get(arena);
+	background_thread_info_t *info = arena_background_thread_info_get(
+	    arena);
 	if (background_thread_indefinite_sleep(info)) {
-		arena_maybe_do_deferred_work(tsdn, arena,
-		    &arena->pa_shard.pac.decay_dirty, 0);
+		arena_maybe_do_deferred_work(
+		    tsdn, arena, &arena->pa_shard.pac.decay_dirty, 0);
 	}
 }
 
 /*
  * React to deferred work generated by a PAI function.
  */
-void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+void
+arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (decay_immediately(&arena->pa_shard.pac.decay_dirty)) {
 		arena_decay_dirty(tsdn, arena, false, true);
@@ -223,133 +239,105 @@ void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	arena_background_thread_inactivity_check(tsdn, arena, false);
 }
 
-static void *
-arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
-	void *ret;
-	slab_data_t *slab_data = edata_slab_data_get(slab);
-	size_t regind;
-
-	assert(edata_nfree_get(slab) > 0);
-	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
-
-	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((uintptr_t)edata_addr_get(slab) +
-	    (uintptr_t)(bin_info->reg_size * regind));
-	edata_nfree_dec(slab);
-	return ret;
-}
-
-static void
-arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
-			   unsigned cnt, void** ptrs) {
-	slab_data_t *slab_data = edata_slab_data_get(slab);
-
-	assert(edata_nfree_get(slab) >= cnt);
-	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
-
-#if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
-	for (unsigned i = 0; i < cnt; i++) {
-		size_t regind = bitmap_sfu(slab_data->bitmap,
-					   &bin_info->bitmap_info);
-		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab) +
-		    (uintptr_t)(bin_info->reg_size * regind));
-	}
-#else
-	unsigned group = 0;
-	bitmap_t g = slab_data->bitmap[group];
-	unsigned i = 0;
-	while (i < cnt) {
-		while (g == 0) {
-			g = slab_data->bitmap[++group];
-		}
-		size_t shift = group << LG_BITMAP_GROUP_NBITS;
-		size_t pop = popcount_lu(g);
-		if (pop > (cnt - i)) {
-			pop = cnt - i;
-		}
-
-		/*
-		 * Load from memory locations only once, outside the
-		 * hot loop below.
-		 */
-		uintptr_t base = (uintptr_t)edata_addr_get(slab);
-		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
-		while (pop--) {
-			size_t bit = cfs_lu(&g);
-			size_t regind = shift + bit;
-			*(ptrs + i) = (void *)(base + regsize * regind);
-
-			i++;
-		}
-		slab_data->bitmap[group] = g;
-	}
-#endif
-	edata_nfree_sub(slab, cnt);
-}
-
 static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
-	szind_t index, hindex;
-
 	cassert(config_stats);
 
+	szind_t index = sz_size2index(usize);
+	/* This only occurs when we have a sampled small allocation */
 	if (usize < SC_LARGE_MINCLASS) {
-		usize = SC_LARGE_MINCLASS;
+		assert(index < SC_NBINS);
+		assert(usize >= PAGE && usize % PAGE == 0);
+		bin_t *bin = arena_get_bin(arena, index, /* binshard */ 0);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.nmalloc++;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	} else {
+		assert(index >= SC_NBINS);
+		szind_t hindex = index - SC_NBINS;
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[hindex].nmalloc, 1);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[hindex].active_bytes, usize);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	index = sz_size2index(usize);
-	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
-
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.lstats[hindex].nmalloc, 1);
 }
 
 static void
 arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
-	szind_t index, hindex;
-
 	cassert(config_stats);
 
+	szind_t index = sz_size2index(usize);
+	/* This only occurs when we have a sampled small allocation */
 	if (usize < SC_LARGE_MINCLASS) {
-		usize = SC_LARGE_MINCLASS;
+		assert(index < SC_NBINS);
+		assert(usize >= PAGE && usize % PAGE == 0);
+		bin_t *bin = arena_get_bin(arena, index, /* binshard */ 0);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.ndalloc++;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	} else {
+		assert(index >= SC_NBINS);
+		szind_t hindex = index - SC_NBINS;
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[hindex].ndalloc, 1);
+		locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[hindex].active_bytes, usize);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	index = sz_size2index(usize);
-	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
-
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.lstats[hindex].ndalloc, 1);
 }
 
 static void
-arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
-    size_t usize) {
+arena_large_ralloc_stats_update(
+    tsdn_t *tsdn, arena_t *arena, size_t oldusize, size_t usize) {
 	arena_large_malloc_stats_update(tsdn, arena, usize);
 	arena_large_dalloc_stats_update(tsdn, arena, oldusize);
 }
 
 edata_t *
-arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero) {
-	bool deferred_work_generated = false;
+arena_extent_alloc_large(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
+	bool    deferred_work_generated = false;
 	szind_t szind = sz_size2index(usize);
-	size_t esize = usize + sz_large_pad;
+	size_t  esize = usize + sz_large_pad;
 
-	bool guarded = san_large_extent_decide_guard(tsdn,
-	    arena_get_ehooks(arena), esize, alignment);
+	bool guarded = san_large_extent_decide_guard(
+	    tsdn, arena_get_ehooks(arena), esize, alignment);
+
+	/*
+	 * - if usize >= opt_calloc_madvise_threshold,
+	 *     - pa_alloc(..., zero_override = zero, ...)
+	 * - otherwise,
+	 *     - pa_alloc(..., zero_override = false, ...)
+	 *     - use memset() to zero out memory if zero == true.
+	 */
+	bool zero_override = zero && (usize >= opt_calloc_madvise_threshold);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
-	assert(deferred_work_generated == false);
+	    /* slab */ false, szind, zero_override, guarded,
+	    &deferred_work_generated);
 
-	if (edata != NULL) {
-		if (config_stats) {
-			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
-			arena_large_malloc_stats_update(tsdn, arena, usize);
-			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
-		}
+	if (edata == NULL) {
+		return NULL;
 	}
 
-	if (edata != NULL && sz_large_pad != 0) {
+	if (config_stats) {
+		arena_large_malloc_stats_update(tsdn, arena, usize);
+	}
+	if (sz_large_pad != 0) {
 		arena_cache_oblivious_randomize(tsdn, arena, edata, alignment);
 	}
+	/*
+	 * This branch should be put after the randomization so that the addr
+	 * returned by edata_addr_get() has already be randomized,
+	 * if cache_oblivious is enabled.
+	 */
+	if (zero && !zero_override && !edata_zeroed_get(edata)) {
+		void  *addr = edata_addr_get(edata);
+		size_t usize = edata_usize_get(edata);
+		memset(addr, 0, usize);
+	}
 
 	return edata;
 }
@@ -357,34 +345,28 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void
 arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
-		arena_large_dalloc_stats_update(tsdn, arena,
-		    edata_usize_get(edata));
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
+		arena_large_dalloc_stats_update(
+		    tsdn, arena, edata_usize_get(edata));
 	}
 }
 
 void
-arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t oldusize) {
+arena_extent_ralloc_large_shrink(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
 void
-arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t oldusize) {
+arena_extent_ralloc_large_expand(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
@@ -405,12 +387,12 @@ arena_decide_unforced_purge_eagerness(bool is_background_thread) {
 }
 
 bool
-arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
-    ssize_t decay_ms) {
+arena_decay_ms_set(
+    tsdn_t *tsdn, arena_t *arena, extent_state_t state, ssize_t decay_ms) {
 	pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness(
 	    /* is_background_thread */ false);
-	return pa_decay_ms_set(tsdn, &arena->pa_shard, state, decay_ms,
-	    eagerness);
+	return pa_decay_ms_set(
+	    tsdn, &arena->pa_shard, state, decay_ms, eagerness);
 }
 
 ssize_t
@@ -420,8 +402,8 @@ arena_decay_ms_get(arena_t *arena, extent_state_t state) {
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool is_background_thread, bool all) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool is_background_thread,
+    bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		pac_decay_all(tsdn, &arena->pa_shard.pac, decay, decay_stats,
@@ -434,19 +416,19 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-	pac_purge_eagerness_t eagerness =
-	    arena_decide_unforced_purge_eagerness(is_background_thread);
-	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
-	    decay, decay_stats, ecache, eagerness);
-	size_t npages_new;
+	pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness(
+	    is_background_thread);
+	bool epoch_advanced = pac_maybe_decay_purge(
+	    tsdn, &arena->pa_shard.pac, decay, decay_stats, ecache, eagerness);
+	size_t npages_new JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(0);
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
 		npages_new = decay_epoch_npages_delta(decay);
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	if (have_background_thread && background_thread_enabled() &&
-	    epoch_advanced && !is_background_thread) {
+	if (have_background_thread && background_thread_enabled()
+	    && epoch_advanced && !is_background_thread) {
 		arena_maybe_do_deferred_work(tsdn, arena, decay, npages_new);
 	}
 
@@ -454,16 +436,16 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 }
 
 static bool
-arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all) {
+arena_decay_dirty(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
 	    &arena->pa_shard.pac.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
 
 static bool
-arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all) {
+arena_decay_muzzy(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	if (pa_shard_dont_decay_muzzy(&arena->pa_shard)) {
 		return false;
 	}
@@ -480,7 +462,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 		 * as possible", including flushing any caches (for situations
 		 * like thread death, or manual purge calls).
 		 */
-		sec_flush(tsdn, &arena->pa_shard.hpa_sec);
+		pa_shard_flush(tsdn, &arena->pa_shard);
 	}
 	if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
 		return;
@@ -510,13 +492,13 @@ arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 	nstime_subtract(remaining_sleep, &decay->epoch);
 	if (npages_new > 0) {
-		uint64_t npurge_new = decay_npages_purge_in(decay,
-		    remaining_sleep, npages_new);
+		uint64_t npurge_new = decay_npages_purge_in(
+		    decay, remaining_sleep, npages_new);
 		info->npages_to_purge_new += npurge_new;
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
-	return info->npages_to_purge_new >
-	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
+	return info->npages_to_purge_new
+	    > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
 }
 
 /*
@@ -528,8 +510,8 @@ arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
  * deferred work has been generated.
  */
 static void
-arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    size_t npages_new) {
+arena_maybe_do_deferred_work(
+    tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new) {
 	background_thread_info_t *info = arena_background_thread_info_get(
 	    arena);
 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
@@ -549,7 +531,7 @@ arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	if (background_thread_indefinite_sleep(info)) {
 		background_thread_wakeup_early(info, NULL);
 	} else if (arena_should_decay_early(tsdn, arena, decay, info,
-	    &remaining_sleep, npages_new)) {
+	               &remaining_sleep, npages_new)) {
 		info->npages_to_purge_new = 0;
 		background_thread_wakeup_early(info, &remaining_sleep);
 	}
@@ -573,63 +555,12 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	}
 }
 
-static void
-arena_bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) {
-	assert(edata_nfree_get(slab) > 0);
-	edata_heap_insert(&bin->slabs_nonfull, slab);
-	if (config_stats) {
-		bin->stats.nonfull_slabs++;
-	}
-}
-
-static void
-arena_bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) {
-	edata_heap_remove(&bin->slabs_nonfull, slab);
-	if (config_stats) {
-		bin->stats.nonfull_slabs--;
-	}
-}
-
-static edata_t *
-arena_bin_slabs_nonfull_tryget(bin_t *bin) {
-	edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull);
-	if (slab == NULL) {
-		return NULL;
-	}
-	if (config_stats) {
-		bin->stats.reslabs++;
-		bin->stats.nonfull_slabs--;
-	}
-	return slab;
-}
-
-static void
-arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, edata_t *slab) {
-	assert(edata_nfree_get(slab) == 0);
-	/*
-	 *  Tracking extents is required by arena_reset, which is not allowed
-	 *  for auto arenas.  Bypass this step to avoid touching the edata
-	 *  linkage (often results in cache misses) for auto arenas.
-	 */
-	if (arena_is_auto(arena)) {
-		return;
-	}
-	edata_list_active_append(&bin->slabs_full, slab);
-}
-
-static void
-arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
-	if (arena_is_auto(arena)) {
-		return;
-	}
-	edata_list_active_remove(&bin->slabs_full, slab);
-}
-
 static void
 arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	edata_t *slab;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+
 	if (bin->slabcur != NULL) {
 		slab = bin->slabcur;
 		bin->slabcur = NULL;
@@ -643,8 +574,8 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
 	for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL;
-	     slab = edata_list_active_first(&bin->slabs_full)) {
-		arena_bin_slabs_full_remove(arena, bin, slab);
+	    slab = edata_list_active_first(&bin->slabs_full)) {
+		bin_slabs_full_remove(false, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -656,6 +587,79 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 }
 
+void
+arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(isalloc(tsdn, ptr) == bumped_usize);
+	assert(sz_can_use_slab(usize));
+
+	if (config_opt_safety_checks) {
+		safety_check_set_redzone(ptr, usize, bumped_usize);
+	}
+
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+
+	szind_t szind = sz_size2index(usize);
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
+
+	assert(isalloc(tsdn, ptr) == usize);
+}
+
+static size_t
+arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	size_t usize = isalloc(tsdn, ptr);
+	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
+	assert(bumped_usize <= SC_LARGE_MINCLASS
+	    && PAGE_CEILING(bumped_usize) == bumped_usize);
+	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
+	szind_t szind = sz_size2index(bumped_usize);
+
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
+
+	assert(isalloc(tsdn, ptr) == bumped_usize);
+
+	return bumped_usize;
+}
+
+static void
+arena_dalloc_promoted_impl(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path, edata_t *edata) {
+	cassert(config_prof);
+	assert(opt_prof);
+
+	size_t usize = edata_usize_get(edata);
+	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
+	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
+		/*
+		 * Currently, we only do redzoning for small sampled
+		 * allocations.
+		 */
+		safety_check_verify_redzone(ptr, usize, bumped_usize);
+	}
+	szind_t bumped_ind = sz_size2index(bumped_usize);
+	if (bumped_usize >= SC_LARGE_MINCLASS && tcache != NULL
+	    && bumped_ind < TCACHE_NBINS_MAX
+	    && !tcache_bin_disabled(
+	        bumped_ind, &tcache->bins[bumped_ind], tcache->tcache_slow)) {
+		tcache_dalloc_large(
+		    tsdn_tsd(tsdn), tcache, ptr, bumped_ind, slow_path);
+	} else {
+		large_dalloc(tsdn, edata);
+	}
+}
+
+void
+arena_dalloc_promoted(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	arena_dalloc_promoted_impl(tsdn, ptr, tcache, slow_path, edata);
+}
+
 void
 arena_reset(tsd_t *tsd, arena_t *arena) {
 	/*
@@ -677,24 +681,29 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 
 	for (edata_t *edata = edata_list_active_first(&arena->large);
 	    edata != NULL; edata = edata_list_active_first(&arena->large)) {
-		void *ptr = edata_base_get(edata);
+		void  *ptr = edata_base_get(edata);
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		emap_alloc_ctx_t alloc_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		    &alloc_ctx);
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
-			usize = sz_index2size(alloc_ctx.szind);
+			usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 			assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 		}
 		/* Remove large allocation from prof sample set. */
 		if (config_prof && opt_prof) {
 			prof_free(tsd, ptr, usize, &alloc_ctx);
 		}
-		large_dalloc(tsd_tsdn(tsd), edata);
+		if (config_prof && opt_prof && alloc_ctx.szind < SC_NBINS) {
+			arena_dalloc_promoted_impl(tsd_tsdn(tsd), ptr,
+			    /* tcache */ NULL, /* slow_path */ true, edata);
+		} else {
+			large_dalloc(tsd_tsdn(tsd), edata);
+		}
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -709,8 +718,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 }
 
 static void
-arena_prepare_base_deletion_sync_finish(tsd_t *tsd, malloc_mutex_t **mutexes,
-    unsigned n_mtx) {
+arena_prepare_base_deletion_sync_finish(
+    tsd_t *tsd, malloc_mutex_t **mutexes, unsigned n_mtx) {
 	for (unsigned i = 0; i < n_mtx; i++) {
 		malloc_mutex_lock(tsd_tsdn(tsd), mutexes[i]);
 		malloc_mutex_unlock(tsd_tsdn(tsd), mutexes[i]);
@@ -769,9 +778,9 @@ arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) {
 	unsigned destroy_ind = base_ind_get(base_to_destroy);
 	assert(destroy_ind >= manual_arena_base);
 
-	tsdn_t *tsdn = tsd_tsdn(tsd);
+	tsdn_t         *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_t *delayed_mtx[ARENA_DESTROY_MAX_DELAYED_MTX];
-	unsigned n_delayed = 0, total = narenas_total_get();
+	unsigned        n_delayed = 0, total = narenas_total_get();
 	for (unsigned i = 0; i < total; i++) {
 		if (i == destroy_ind) {
 			continue;
@@ -781,12 +790,12 @@ arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) {
 			continue;
 		}
 		pac_t *pac = &arena->pa_shard.pac;
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_dirty.mtx,
-		    delayed_mtx, &n_delayed);
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_muzzy.mtx,
-		    delayed_mtx, &n_delayed);
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_retained.mtx,
-		    delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_dirty.mtx, delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_muzzy.mtx, delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_retained.mtx, delayed_mtx, &n_delayed);
 	}
 	arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n_delayed);
 }
@@ -828,17 +837,17 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 }
 
 static edata_t *
-arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
-    const bin_info_t *bin_info) {
+arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned binshard, const bin_info_t *bin_info) {
 	bool deferred_work_generated = false;
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool guarded = san_slab_extent_decide_guard(tsdn,
-	    arena_get_ehooks(arena));
+	bool guarded = san_slab_extent_decide_guard(
+	    tsdn, arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
-	     /* zero */ false, guarded, &deferred_work_generated);
+	    /* zero */ false, guarded, &deferred_work_generated);
 
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
@@ -857,84 +866,13 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	return slab;
 }
 
-/*
- * Before attempting the _with_fresh_slab approaches below, the _no_fresh_slab
- * variants (i.e. through slabcur and nonfull) must be tried first.
- */
-static void
-arena_bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, arena_t *arena,
-    bin_t *bin, szind_t binind, edata_t *fresh_slab) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	/* Only called after slabcur and nonfull both failed. */
-	assert(bin->slabcur == NULL);
-	assert(edata_heap_first(&bin->slabs_nonfull) == NULL);
-	assert(fresh_slab != NULL);
-
-	/* A new slab from arena_slab_alloc() */
-	assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs);
-	if (config_stats) {
-		bin->stats.nslabs++;
-		bin->stats.curslabs++;
-	}
-	bin->slabcur = fresh_slab;
-}
-
-/* Refill slabcur and then alloc using the fresh slab */
-static void *
-arena_bin_malloc_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *fresh_slab) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena, bin, binind,
-	    fresh_slab);
-
-	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
-}
-
-static bool
-arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
-    bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	/* Only called after arena_slab_reg_alloc[_batch] failed. */
-	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
-
-	if (bin->slabcur != NULL) {
-		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-	}
-
-	/* Look for a usable slab. */
-	bin->slabcur = arena_bin_slabs_nonfull_tryget(bin);
-	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0);
-
-	return (bin->slabcur == NULL);
-}
-
-bin_t *
-arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    unsigned *binshard_p) {
-	unsigned binshard;
-	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
-		binshard = 0;
-	} else {
-		binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
-	}
-	assert(binshard < bin_infos[binind].n_shards);
-	if (binshard_p != NULL) {
-		*binshard_p = binshard;
-	}
-	return arena_get_bin(arena, binind, binshard);
-}
-
-void
-arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
-    const unsigned nfill) {
-	assert(cache_bin_ncached_get_local(cache_bin, cache_bin_info) == 0);
+cache_bin_sz_t
+arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max, cache_bin_stats_t merge_stats) {
+	assert(nfill_min > 0 && nfill_min <= nfill_max);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-
-	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_init_ptr_array_for_fill(cache_bin, cache_bin_info, &ptrs,
-	    nfill);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -961,40 +899,47 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	 * local exhausted, b) unlock and slab_alloc returns null, c) re-lock
 	 * and bin local fails again.
 	 */
-	bool made_progress = true;
-	edata_t *fresh_slab = NULL;
-	bool alloc_and_retry = false;
-	unsigned filled = 0;
-	unsigned binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	bool           made_progress = true;
+	edata_t       *fresh_slab = NULL;
+	bool           alloc_and_retry = false;
+	bool           is_auto = arena_is_auto(arena);
+	cache_bin_sz_t filled = 0;
+	unsigned       binshard;
+	bin_t         *bin = bin_choose(tsdn, arena, binind, &binshard);
 
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
 
-	while (filled < nfill) {
+	while (filled < nfill_min) {
 		/* Try batch-fill from slabcur first. */
 		edata_t *slabcur = bin->slabcur;
 		if (slabcur != NULL && edata_nfree_get(slabcur) > 0) {
-			unsigned tofill = nfill - filled;
-			unsigned nfree = edata_nfree_get(slabcur);
-			unsigned cnt = tofill < nfree ? tofill : nfree;
+			/*
+			 * Use up the free slots if the total filled <= nfill_max.
+			 * Otherwise, fallback to nfill_min for a more conservative
+			 * memory usage.
+			 */
+			unsigned cnt = edata_nfree_get(slabcur);
+			if (cnt + filled > nfill_max) {
+				cnt = nfill_min - filled;
+			}
 
-			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
-			    &ptrs.ptr[filled]);
+			bin_slab_reg_alloc_batch(
+			    slabcur, bin_info, cnt, &arr->ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
 		}
 		/* Next try refilling slabcur from nonfull slabs. */
-		if (!arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
+		if (!bin_refill_slabcur_no_fresh_slab(tsdn, is_auto, bin)) {
 			assert(bin->slabcur != NULL);
 			continue;
 		}
 
 		/* Then see if a new slab was reserved already. */
 		if (fresh_slab != NULL) {
-			arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena,
-			    bin, binind, fresh_slab);
+			bin_refill_slabcur_with_fresh_slab(
+			    tsdn, bin, binind, fresh_slab);
 			assert(bin->slabcur != NULL);
 			fresh_slab = NULL;
 			continue;
@@ -1014,32 +959,32 @@ label_refill:
 		assert(fresh_slab == NULL);
 		assert(!alloc_and_retry);
 		break;
-	} /* while (filled < nfill) loop. */
+	} /* while (filled < nfill_min) loop. */
 
 	if (config_stats && !alloc_and_retry) {
 		bin->stats.nmalloc += filled;
-		bin->stats.nrequests += cache_bin->tstats.nrequests;
+		bin->stats.nrequests += merge_stats.nrequests;
 		bin->stats.curregs += filled;
 		bin->stats.nfills++;
-		cache_bin->tstats.nrequests = 0;
 	}
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
-		assert(filled < nfill);
+		assert(filled < nfill_min);
 		assert(made_progress);
 
-		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
-		    bin_info);
+		fresh_slab = arena_slab_alloc(
+		    tsdn, arena, binind, binshard, bin_info);
 		/* fresh_slab NULL case handled in the for loop. */
 
 		alloc_and_retry = false;
 		made_progress = false;
 		goto label_refill;
 	}
-	assert(filled == nfill || (fresh_slab == NULL && !made_progress));
+	assert((filled >= nfill_min && filled <= nfill_max)
+	    || (fresh_slab == NULL && !made_progress));
 
 	/* Release if allocated but not used. */
 	if (fresh_slab != NULL) {
@@ -1048,8 +993,8 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_finish_fill(cache_bin, cache_bin_info, &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
+	return filled;
 }
 
 size_t
@@ -1057,22 +1002,24 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const size_t nregs = bin_info->nregs;
+	const size_t      nregs = bin_info->nregs;
 	assert(nregs > 0);
 	const size_t usize = bin_info->reg_size;
 
 	const bool manual_arena = !arena_is_auto(arena);
-	unsigned binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	unsigned   binshard;
+	bin_t     *bin = bin_choose(tsdn, arena, binind, &binshard);
 
-	size_t nslab = 0;
-	size_t filled = 0;
-	edata_t *slab = NULL;
+	size_t              nslab = 0;
+	size_t              filled = 0;
+	edata_t            *slab = NULL;
 	edata_list_active_t fulls;
 	edata_list_active_init(&fulls);
 
-	while (filled < nfill && (slab = arena_slab_alloc(tsdn, arena, binind,
-	    binshard, bin_info)) != NULL) {
+	while (filled < nfill
+	    && (slab = arena_slab_alloc(
+	            tsdn, arena, binind, binshard, bin_info))
+	        != NULL) {
 		assert((size_t)edata_nfree_get(slab) == nregs);
 		++nslab;
 		size_t batch = nfill - filled;
@@ -1080,8 +1027,8 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 			batch = nregs;
 		}
 		assert(batch > 0);
-		arena_slab_reg_alloc_batch(slab, bin_info, (unsigned)batch,
-		    &ptrs[filled]);
+		bin_slab_reg_alloc_batch(
+		    slab, bin_info, (unsigned)batch, &ptrs[filled]);
 		assert(edata_addr_get(slab) == ptrs[filled]);
 		if (zero) {
 			memset(ptrs[filled], 0, batch * usize);
@@ -1101,7 +1048,7 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	 * iff slab != NULL.
 	 */
 	if (slab != NULL) {
-		arena_bin_lower_slab(tsdn, arena, slab, bin);
+		bin_lower_slab(tsdn, !manual_arena, slab, bin);
 	}
 	if (manual_arena) {
 		edata_list_active_concat(&bin->slabs_full, &fulls);
@@ -1120,52 +1067,35 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	return filled;
 }
 
-/*
- * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
- * bin->slabcur if necessary.
- */
-static void *
-arena_bin_malloc_no_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
-		if (arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
-			return NULL;
-		}
-	}
-
-	assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
-	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
-}
-
 static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t usize = sz_index2size(binind);
-	unsigned binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	size_t            usize = sz_index2size(binind);
+	bool              is_auto = arena_is_auto(arena);
+	unsigned          binshard;
+	bin_t *bin = bin_choose(tsdn, arena, binind, &binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	edata_t *fresh_slab = NULL;
-	void *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+	void    *ret = bin_malloc_no_fresh_slab(tsdn, is_auto, bin, binind);
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
 		/******************************/
-		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
-		    bin_info);
+		fresh_slab = arena_slab_alloc(
+		    tsdn, arena, binind, binshard, bin_info);
 		/********************************/
 		malloc_mutex_lock(tsdn, &bin->lock);
 		/* Retry since the lock was dropped. */
-		ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+		ret = bin_malloc_no_fresh_slab(tsdn, is_auto, bin, binind);
 		if (ret == NULL) {
 			if (fresh_slab == NULL) {
 				/* OOM */
 				malloc_mutex_unlock(tsdn, &bin->lock);
 				return NULL;
 			}
-			ret = arena_bin_malloc_with_fresh_slab(tsdn, arena, bin,
-			    binind, fresh_slab);
+			ret = bin_malloc_with_fresh_slab(
+			    tsdn, bin, binind, fresh_slab);
 			fresh_slab = NULL;
 		}
 	}
@@ -1189,7 +1119,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 
 void *
 arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
-    bool zero) {
+    bool zero, bool slab) {
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	if (likely(!tsdn_null(tsdn))) {
@@ -1199,18 +1129,19 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		return NULL;
 	}
 
-	if (likely(size <= SC_SMALL_MAXCLASS)) {
+	if (likely(slab)) {
+		assert(sz_can_use_slab(size));
 		return arena_malloc_small(tsdn, arena, ind, zero);
+	} else {
+		return large_malloc(tsdn, arena, sz_s2u(size), zero);
 	}
-	return large_malloc(tsdn, arena, sz_index2size(ind), zero);
 }
 
 void *
 arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache) {
-	void *ret;
-
-	if (usize <= SC_SMALL_MAXCLASS) {
+    bool zero, bool slab, tcache_t *tcache) {
+	if (slab) {
+		assert(sz_can_use_slab(usize));
 		/* Small; alignment doesn't require special slab placement. */
 
 		/* usize should be a result of sz_sa2u() */
@@ -1221,161 +1152,30 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		 */
 		assert(alignment <= PAGE);
 
-		ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
-		    zero, tcache, true);
+		return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
+		    zero, slab, tcache, true);
 	} else {
 		if (likely(alignment <= CACHELINE)) {
-			ret = large_malloc(tsdn, arena, usize, zero);
+			return large_malloc(tsdn, arena, usize, zero);
 		} else {
-			ret = large_palloc(tsdn, arena, usize, alignment, zero);
+			return large_palloc(
+			    tsdn, arena, usize, alignment, zero);
 		}
 	}
-	return ret;
-}
-
-void
-arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
-	assert(usize <= SC_SMALL_MAXCLASS);
-
-	if (config_opt_safety_checks) {
-		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
-	}
-
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-
-	szind_t szind = sz_size2index(usize);
-	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
-
-	assert(isalloc(tsdn, ptr) == usize);
-}
-
-static size_t
-arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	edata_szind_set(edata, SC_NBINS);
-	emap_remap(tsdn, &arena_emap_global, edata, SC_NBINS, /* slab */ false);
-
-	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
-
-	return SC_LARGE_MINCLASS;
-}
-
-void
-arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path) {
-	cassert(config_prof);
-	assert(opt_prof);
-
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-	size_t usize = edata_usize_get(edata);
-	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
-	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
-		/*
-		 * Currently, we only do redzoning for small sampled
-		 * allocations.
-		 */
-		assert(bumped_usize == SC_LARGE_MINCLASS);
-		safety_check_verify_redzone(ptr, usize, bumped_usize);
-	}
-	if (bumped_usize <= tcache_maxclass && tcache != NULL) {
-		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-		    sz_size2index(bumped_usize), slow_path);
-	} else {
-		large_dalloc(tsdn, edata);
-	}
-}
-
-static void
-arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
-	/* Dissociate slab from bin. */
-	if (slab == bin->slabcur) {
-		bin->slabcur = NULL;
-	} else {
-		szind_t binind = edata_szind_get(slab);
-		const bin_info_t *bin_info = &bin_infos[binind];
-
-		/*
-		 * The following block's conditional is necessary because if the
-		 * slab only contains one region, then it never gets inserted
-		 * into the non-full slabs heap.
-		 */
-		if (bin_info->nregs == 1) {
-			arena_bin_slabs_full_remove(arena, bin, slab);
-		} else {
-			arena_bin_slabs_nonfull_remove(bin, slab);
-		}
-	}
-}
-
-static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin) {
-	assert(edata_nfree_get(slab) > 0);
-
-	/*
-	 * Make sure that if bin->slabcur is non-NULL, it refers to the
-	 * oldest/lowest non-full slab.  It is okay to NULL slabcur out rather
-	 * than proactively keeping it pointing at the oldest/lowest non-full
-	 * slab.
-	 */
-	if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) {
-		/* Switch slabcur. */
-		if (edata_nfree_get(bin->slabcur) > 0) {
-			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
-		} else {
-			arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-		}
-		bin->slabcur = slab;
-		if (config_stats) {
-			bin->stats.reslabs++;
-		}
-	} else {
-		arena_bin_slabs_nonfull_insert(bin, slab);
-	}
-}
-
-static void
-arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-
-	assert(slab != bin->slabcur);
-	if (config_stats) {
-		bin->stats.curslabs--;
-	}
-}
-
-void
-arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin) {
-	arena_dissociate_bin_slab(arena, slab, bin);
-	arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
-}
-
-void
-arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin) {
-	arena_bin_slabs_full_remove(arena, bin, slab);
-	arena_bin_lower_slab(tsdn, arena, slab, bin);
 }
 
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
-	szind_t binind = edata_szind_get(edata);
+	szind_t  binind = edata_szind_get(edata);
 	unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = arena_get_bin(arena, binind, binshard);
+	bin_t   *bin = arena_get_bin(arena, binind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_info_t info;
-	arena_dalloc_bin_locked_begin(&info, binind);
-	bool ret = arena_dalloc_bin_locked_step(tsdn, arena, bin,
-	    &info, binind, edata, ptr);
-	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
+	bin_dalloc_locked_info_t info;
+	bin_dalloc_locked_begin(&info, binind);
+	bool ret = bin_dalloc_locked_step(
+	    tsdn, arena_is_auto(arena), bin, &info, binind, edata, ptr);
+	bin_dalloc_locked_finish(tsdn, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (ret) {
@@ -1392,6 +1192,358 @@ arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
 	arena_decay_tick(tsdn, arena);
 }
 
+static const void *
+arena_ptr_array_flush_ptr_getter(void *arr_ctx, size_t ind) {
+	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
+	return arr->ptr[ind];
+}
+
+static void
+arena_ptr_array_flush_metadata_visitor(
+    void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) {
+	size_t *szind_sum = (size_t *)szind_sum_ctx;
+	*szind_sum -= alloc_ctx->szind;
+	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
+}
+
+JEMALLOC_NOINLINE static void
+arena_ptr_array_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
+    size_t nptrs, emap_batch_lookup_result_t *edatas) {
+	bool found_mismatch = false;
+	for (size_t i = 0; i < nptrs; i++) {
+		szind_t true_szind = edata_szind_get(edatas[i].edata);
+		if (true_szind != szind) {
+			found_mismatch = true;
+			safety_check_fail_sized_dealloc(
+			    /* current_dealloc */ false,
+			    /* ptr */ arena_ptr_array_flush_ptr_getter(arr, i),
+			    /* true_size */ sz_index2size(true_szind),
+			    /* input_size */ sz_index2size(szind));
+		}
+	}
+	assert(found_mismatch);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
+    cache_bin_ptr_array_t *arr, emap_batch_lookup_result_t *item_edata,
+    cache_bin_sz_t nflush, arena_t *stats_arena,
+    cache_bin_stats_t **merge_stats) {
+	/*
+	 * The slabs where we freed the last remaining object in the slab (and
+	 * so need to free the slab itself).
+	 * Used only if small == true.
+	 */
+	unsigned dalloc_count = 0;
+	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	/*
+	 * We maintain the invariant that all edatas yet to be flushed are
+	 * contained in the half-open range [flush_start, flush_end).  We'll
+	 * repeatedly partition the array so that the unflushed items are at the
+	 * end.
+	 */
+	unsigned flush_start = 0;
+
+	while (flush_start < nflush) {
+		/*
+		 * After our partitioning step, all objects to flush will be in
+		 * the half-open range [prev_flush_start, flush_start), and
+		 * flush_start will be updated to correspond to the next loop
+		 * iteration.
+		 */
+		unsigned prev_flush_start = flush_start;
+
+		edata_t *cur_edata = item_edata[flush_start].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(cur_edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		unsigned cur_binshard = edata_binshard_get(cur_edata);
+		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
+		assert(cur_binshard < bin_infos[binind].n_shards);
+		/*
+		 * Start off the partition; item_edata[i] always matches itself
+		 * of course.
+		 */
+		flush_start++;
+		for (unsigned i = flush_start; i < nflush; i++) {
+			void    *ptr = arr->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			assert(
+			    (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
+			assert(
+			    (uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
+			if (edata_arena_ind_get(edata) == cur_arena_ind
+			    && edata_binshard_get(edata) == cur_binshard) {
+				/* Swap the edatas. */
+				emap_batch_lookup_result_t temp_edata =
+				    item_edata[flush_start];
+				item_edata[flush_start] = item_edata[i];
+				item_edata[i] = temp_edata;
+				/* Swap the pointers */
+				void *temp_ptr = arr->ptr[flush_start];
+				arr->ptr[flush_start] = arr->ptr[i];
+				arr->ptr[i] = temp_ptr;
+				flush_start++;
+			}
+		}
+		/* Make sure we implemented partitioning correctly. */
+		if (config_debug) {
+			for (unsigned i = prev_flush_start; i < flush_start;
+			    i++) {
+				edata_t *edata = item_edata[i].edata;
+				unsigned arena_ind = edata_arena_ind_get(edata);
+				assert(arena_ind == cur_arena_ind);
+				unsigned binshard = edata_binshard_get(edata);
+				assert(binshard == cur_binshard);
+			}
+			for (unsigned i = flush_start; i < nflush; i++) {
+				edata_t *edata = item_edata[i].edata;
+				assert(
+				    edata_arena_ind_get(edata) != cur_arena_ind
+				    || edata_binshard_get(edata)
+				        != cur_binshard);
+			}
+		}
+
+		/* Actually do the flushing. */
+		malloc_mutex_lock(tsdn, &cur_bin->lock);
+
+		/*
+		 * Flush stats first, if that was the right lock.  Note that we
+		 * don't actually have to flush stats into the current thread's
+		 * binshard. Flushing into any binshard in the same arena is
+		 * enough; we don't expose stats on per-binshard basis (just
+		 * per-bin).
+		 */
+		if (config_stats && stats_arena == cur_arena
+		    && *merge_stats != NULL) {
+			cur_bin->stats.nflushes++;
+			cur_bin->stats.nrequests += (*merge_stats)->nrequests;
+			*merge_stats = NULL;
+		}
+
+		/* Next flush objects. */
+		/* Init only to avoid used-uninitialized warning. */
+		bin_dalloc_locked_info_t dalloc_bin_info = {0};
+		bin_dalloc_locked_begin(&dalloc_bin_info, binind);
+		for (unsigned i = prev_flush_start; i < flush_start; i++) {
+			void    *ptr = arr->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			if (bin_dalloc_locked_step(tsdn,
+			        arena_is_auto(cur_arena),
+			        cur_bin, &dalloc_bin_info, binind, edata,
+			        ptr)) {
+				dalloc_slabs[dalloc_count] = edata;
+				dalloc_count++;
+			}
+		}
+
+		bin_dalloc_locked_finish(
+		    tsdn, cur_bin, &dalloc_bin_info);
+		malloc_mutex_unlock(tsdn, &cur_bin->lock);
+
+		arena_decay_ticks(
+		    tsdn, cur_arena, flush_start - prev_flush_start);
+	}
+
+	/* Handle all deferred slab dalloc. */
+	for (unsigned i = 0; i < dalloc_count; i++) {
+		edata_t *slab = dalloc_slabs[i];
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+
+	if (config_stats && *merge_stats != NULL) {
+		/*
+		 * The flush loop didn't happen to flush to this
+		 * thread's arena, so the stats didn't get merged.
+		 * Manually do so now.
+		 */
+		bin_t *bin = bin_choose(tsdn, stats_arena, binind, NULL);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += (*merge_stats)->nrequests;
+		*merge_stats = NULL;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl_large(tsdn_t *tsdn, szind_t binind,
+    cache_bin_ptr_array_t *arr, emap_batch_lookup_result_t *item_edata,
+    cache_bin_sz_t nflush, arena_t *stats_arena,
+    cache_bin_stats_t **merge_stats) {
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	while (nflush > 0) {
+		/* Lock the arena, or bin, associated with the first object. */
+		edata_t *edata = item_edata[0].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/*
+		 * If we acquired the right lock and have some stats to flush,
+		 * flush them.
+		 */
+		if (config_stats && stats_arena == cur_arena
+		    && *merge_stats != NULL) {
+			arena_stats_large_flush_nrequests_add(tsdn,
+			    &stats_arena->stats, binind,
+			    (*merge_stats)->nrequests);
+			*merge_stats = NULL;
+		}
+
+		/*
+		 * Large allocations need special prep done.  Afterwards, we can
+		 * drop the large lock.
+		 */
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = arr->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+
+			if (edata_arena_ind_get(edata) == cur_arena_ind) {
+				large_dalloc_prep_locked(tsdn, edata);
+			}
+		}
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/* Deallocate whatever we can. */
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = arr->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			if (edata_arena_ind_get(edata) != cur_arena_ind) {
+				/*
+				 * The object was allocated either via a
+				 * different arena, or a different bin in this
+				 * arena.  Either way, stash the object so that
+				 * it can be handled in a future pass.
+				 */
+				arr->ptr[ndeferred] = ptr;
+				item_edata[ndeferred].edata = edata;
+				ndeferred++;
+				continue;
+			}
+			if (large_dalloc_safety_checks(
+			        edata, ptr, sz_index2size(binind))) {
+				/* See the comment in isfree. */
+				continue;
+			}
+			large_dalloc_finish(tsdn, edata);
+		}
+		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
+		nflush = ndeferred;
+	}
+
+	if (config_stats && *merge_stats != NULL) {
+		arena_stats_large_flush_nrequests_add(tsdn, &stats_arena->stats,
+		    binind, (*merge_stats)->nrequests);
+		*merge_stats = NULL;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl(tsd_t *tsd, szind_t binind,
+    cache_bin_ptr_array_t *arr, unsigned nflush, bool small,
+    arena_t *stats_arena, cache_bin_stats_t **merge_stats) {
+	/*
+	 * A couple lookup calls take tsdn; declare it once for convenience
+	 * instead of calling tsd_tsdn(tsd) all the time.
+	 */
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	/*
+	 * Variable length array must have > 0 length; the last element is never
+	 * touched (it's just included to satisfy the no-zero-length rule).
+	 */
+	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
+	/*
+	 * This gets compiled away when config_opt_safety_checks is false.
+	 * Checks for sized deallocation bugs, failing early rather than
+	 * corrupting metadata.
+	 */
+	size_t szind_sum = binind * nflush;
+	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
+	    &arena_ptr_array_flush_ptr_getter, (void *)arr,
+	    &arena_ptr_array_flush_metadata_visitor, (void *)&szind_sum,
+	    item_edata);
+	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
+		arena_ptr_array_flush_size_check_fail(
+		    arr, binind, nflush, item_edata);
+	}
+
+	/*
+	 * The small/large flush logic is very similar; you might conclude that
+	 * it's a good opportunity to share code.  We've tried this, and by and
+	 * large found this to obscure more than it helps; there are so many
+	 * fiddly bits around things like stats handling, precisely when and
+	 * which mutexes are acquired, etc., that almost all code ends up being
+	 * gated behind 'if (small) { ... } else { ... }'.  Even though the
+	 * '...' is morally equivalent, the code itself needs slight tweaks.
+	 */
+	if (small) {
+		return arena_ptr_array_flush_impl_small(tsdn, binind, arr,
+		    item_edata, nflush, stats_arena, merge_stats);
+	} else {
+		return arena_ptr_array_flush_impl_large(tsdn, binind, arr,
+		    item_edata, nflush, stats_arena, merge_stats);
+	}
+}
+
+/*
+ * In practice, pointers are flushed back to their original allocation arenas,
+ * so multiple arenas may be involved here. The input stats_arena simply
+ * indicates where the cache stats should be merged into.
+ */
+void
+arena_ptr_array_flush(tsd_t *tsd, szind_t binind, cache_bin_ptr_array_t *arr,
+    unsigned nflush, bool small, arena_t *stats_arena,
+    cache_bin_stats_t merge_stats) {
+	assert(arr != NULL && arr->ptr != NULL);
+	/*
+     * The input cache bin stats represent a snapshot taken when the pointer
+	 * array is set up, and will be merged into the next-level bin stats.
+     * The original bin stats will be reset by the caller itself.
+     * This separation ensures that each layer operates independently and
+     * does not modify another layer's data directly.
+     */
+	cache_bin_stats_t    *stats = &merge_stats;
+	unsigned              nflush_batch, nflushed = 0;
+	cache_bin_ptr_array_t ptrs_batch;
+	do {
+		nflush_batch = nflush - nflushed;
+		if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) {
+			nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX;
+		}
+		assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX);
+		(&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch;
+		(&ptrs_batch)->ptr = arr->ptr + nflushed;
+		arena_ptr_array_flush_impl(tsd, binind, &ptrs_batch,
+		    nflush_batch, small, stats_arena, &stats);
+		nflushed += nflush_batch;
+	} while (nflushed < nflush);
+	assert(nflush == nflushed);
+	assert((arr->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch));
+	if (config_stats) {
+		assert(stats == NULL);
+	}
+}
+
 bool
 arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize) {
@@ -1407,16 +1559,15 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 
 	size_t usize_min = sz_s2u(size);
 	size_t usize_max = sz_s2u(size + extra);
-	if (likely(oldsize <= SC_SMALL_MAXCLASS && usize_min
-	    <= SC_SMALL_MAXCLASS)) {
+	if (likely(oldsize <= SC_SMALL_MAXCLASS
+	        && usize_min <= SC_SMALL_MAXCLASS)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
-		assert(bin_infos[sz_size2index(oldsize)].reg_size ==
-		    oldsize);
+		assert(bin_infos[sz_size2index(oldsize)].reg_size == oldsize);
 		if ((usize_max > SC_SMALL_MAXCLASS
-		    || sz_size2index(usize_max) != sz_size2index(oldsize))
+		        || sz_size2index(usize_max) != sz_size2index(oldsize))
 		    && (size > oldsize || usize_max < oldsize)) {
 			ret = true;
 			goto done;
@@ -1427,8 +1578,8 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
 	    && usize_max >= SC_LARGE_MINCLASS) {
-		ret = large_ralloc_no_move(tsdn, edata, usize_min, usize_max,
-		    zero);
+		ret = large_ralloc_no_move(
+		    tsdn, edata, usize_min, usize_max, zero);
 	} else {
 		ret = true;
 	}
@@ -1441,61 +1592,64 @@ done:
 
 static void *
 arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache) {
+    size_t alignment, bool zero, bool slab, tcache_t *tcache) {
 	if (alignment == 0) {
 		return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
-		    zero, tcache, true);
+		    zero, slab, tcache, true);
 	}
 	usize = sz_sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	return ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+	return ipalloct_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, arena);
 }
 
 void *
 arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache,
+    size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
 	size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
 
-	if (likely(usize <= SC_SMALL_MAXCLASS)) {
+	if (likely(slab)) {
+		assert(sz_can_use_slab(usize));
 		/* Try to avoid moving the allocation. */
 		UNUSED size_t newsize;
-		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
-		    &newsize)) {
+		if (!arena_ralloc_no_move(
+		        tsdn, ptr, oldsize, usize, 0, zero, &newsize)) {
 			hook_invoke_expand(hook_args->is_realloc
-			    ? hook_expand_realloc : hook_expand_rallocx,
+			        ? hook_expand_realloc
+			        : hook_expand_rallocx,
 			    ptr, oldsize, usize, (uintptr_t)ptr,
 			    hook_args->args);
 			return ptr;
 		}
 	}
 
-	if (oldsize >= SC_LARGE_MINCLASS
-	    && usize >= SC_LARGE_MINCLASS) {
-		return large_ralloc(tsdn, arena, ptr, usize,
-		    alignment, zero, tcache, hook_args);
+	if (oldsize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS) {
+		return large_ralloc(tsdn, arena, ptr, usize, alignment, zero,
+		    tcache, hook_args);
 	}
 
 	/*
 	 * size and oldsize are different enough that we need to move the
 	 * object.  In that case, fall back to allocating new space and copying.
 	 */
-	void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
-	    zero, tcache);
+	void *ret = arena_ralloc_move_helper(
+	    tsdn, arena, usize, alignment, zero, slab, tcache);
 	if (ret == NULL) {
 		return NULL;
 	}
 
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx,
+	    ret, (uintptr_t)ret, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 
 	/*
 	 * Junk/zero-filling were already done by
@@ -1513,8 +1667,8 @@ arena_get_ehooks(arena_t *arena) {
 }
 
 extent_hooks_t *
-arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
-    extent_hooks_t *extent_hooks) {
+arena_set_extent_hooks(
+    tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks) {
 	background_thread_info_t *info;
 	if (have_background_thread) {
 		info = arena_background_thread_info_get(arena);
@@ -1544,6 +1698,22 @@ arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) {
 	return false;
 }
 
+void
+arena_name_get(arena_t *arena, char *name) {
+	char *end = (char *)memchr((void *)arena->name, '\0', ARENA_NAME_LEN);
+	assert(end != NULL);
+	size_t len = (uintptr_t)end - (uintptr_t)arena->name + 1;
+	assert(len > 0 && len <= ARENA_NAME_LEN);
+
+	strncpy(name, arena->name, len);
+}
+
+void
+arena_name_set(arena_t *arena, const char *name) {
+	strncpy(arena->name, name, ARENA_NAME_LEN);
+	arena->name[ARENA_NAME_LEN - 1] = '\0';
+}
+
 ssize_t
 arena_dirty_decay_ms_default_get(void) {
 	return atomic_load_zd(&dirty_decay_ms_default, ATOMIC_RELAXED);
@@ -1573,11 +1743,11 @@ arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
 }
 
 bool
-arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
-    size_t *new_limit) {
+arena_retain_grow_limit_get_set(
+    tsd_t *tsd, arena_t *arena, size_t *old_limit, size_t *new_limit) {
 	assert(opt_retain);
-	return pac_retain_grow_limit_get_set(tsd_tsdn(tsd),
-	    &arena->pa_shard.pac, old_limit, new_limit);
+	return pac_retain_grow_limit_get_set(
+	    tsd_tsdn(tsd), &arena->pa_shard.pac, old_limit, new_limit);
 }
 
 unsigned
@@ -1598,7 +1768,7 @@ arena_nthreads_dec(arena_t *arena, bool internal) {
 arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
-	base_t *base;
+	base_t  *base;
 	unsigned i;
 
 	if (ind == 0) {
@@ -1611,12 +1781,15 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 		}
 	}
 
-	size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total;
+	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE)
+	    + sizeof(bin_t) * nbins_total;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
 	}
-
+	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+	    assert((uintptr_t)&arena->all_bins[nbins_total - 1] + sizeof(bin_t)
+	        <= (uintptr_t)arena + arena_size);)
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
 	arena->last_thd = NULL;
@@ -1629,34 +1802,35 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 		ql_new(&arena->tcache_ql);
 		ql_new(&arena->cache_bin_array_descriptor_ql);
 		if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
-		    WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
+		        WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
 			goto label_error;
 		}
 	}
 
-	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
-	    ATOMIC_RELAXED);
+	atomic_store_u(
+	    &arena->dss_prec, (unsigned)extent_dss_prec_get(), ATOMIC_RELAXED);
 
 	edata_list_active_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
-	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
 		goto label_error;
 	}
 
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
 	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_pa_central_global,
-	    &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
-	    LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
-	    arena_dirty_decay_ms_default_get(),
-	    arena_muzzy_decay_ms_default_get())) {
+	        &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
+	        LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
+	        arena_dirty_decay_ms_default_get(),
+	        arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
 
 	/* Initialize bins. */
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
 	for (i = 0; i < nbins_total; i++) {
-		bool err = bin_init(&arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bool err = bin_init(&arena->all_bins[i]);)
 		if (err) {
 			goto label_error;
 		}
@@ -1667,20 +1841,25 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_set(ind, arena);
 	arena->ind = ind;
 
+	/* Init the name. */
+	malloc_snprintf(arena->name, sizeof(arena->name), "%s_%u",
+	    arena_is_auto(arena) ? "auto" : "manual", arena->ind);
+	arena->name[ARENA_NAME_LEN - 1] = '\0';
+
 	nstime_init_update(&arena->create_time);
 
 	/*
 	 * We turn on the HPA if set to.  There are two exceptions:
 	 * - Custom extent hooks (we should only return memory allocated from
 	 *   them in that case).
-	 * - Arena 0 initialization.  In this case, we're mid-bootstrapping, and
-	 *   so arena_hpa_global is not yet initialized.
+	 * - Arena 0 initialization.  In this case, we're mid-bootstrapping,
+	 *   and so background_thread_enabled is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
-		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &hpa_shard_opts,
+		        &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
@@ -1707,6 +1886,42 @@ label_error:
 	return NULL;
 }
 
+static arena_t *
+arena_create_huge_arena(tsd_t *tsd, unsigned ind) {
+	assert(ind != 0);
+
+	arena_t *huge_arena = arena_get(tsd_tsdn(tsd), ind, true);
+	if (huge_arena == NULL) {
+		return NULL;
+	}
+
+	char *huge_arena_name = "auto_oversize";
+	strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
+	huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
+
+	/*
+	 * Purge eagerly for huge allocations, because: 1) number of huge
+	 * allocations is usually small, which means ticker based decay is not
+	 * reliable; and 2) less immediate reuse is expected for huge
+	 * allocations.
+	 *
+	 * However, with background threads enabled, keep normal purging since
+	 * the purging delay is bounded.
+	 */
+	if (!background_thread_enabled()
+	    && arena_dirty_decay_ms_default_get() > 0) {
+		arena_decay_ms_set(
+		    tsd_tsdn(tsd), huge_arena, extent_state_dirty, 0);
+	}
+	if (!background_thread_enabled()
+	    && arena_muzzy_decay_ms_default_get() > 0) {
+		arena_decay_ms_set(
+		    tsd_tsdn(tsd), huge_arena, extent_state_muzzy, 0);
+	}
+
+	return huge_arena;
+}
+
 arena_t *
 arena_choose_huge(tsd_t *tsd) {
 	/* huge_arena_ind can be 0 during init (will use a0). */
@@ -1717,58 +1932,51 @@ arena_choose_huge(tsd_t *tsd) {
 	arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false);
 	if (huge_arena == NULL) {
 		/* Create the huge arena on demand. */
-		assert(huge_arena_ind != 0);
-		huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true);
-		if (huge_arena == NULL) {
-			return NULL;
-		}
-		/*
-		 * Purge eagerly for huge allocations, because: 1) number of
-		 * huge allocations is usually small, which means ticker based
-		 * decay is not reliable; and 2) less immediate reuse is
-		 * expected for huge allocations.
-		 */
-		if (arena_dirty_decay_ms_default_get() > 0) {
-			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-			    extent_state_dirty, 0);
-		}
-		if (arena_muzzy_decay_ms_default_get() > 0) {
-			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-			    extent_state_muzzy, 0);
-		}
+		huge_arena = arena_create_huge_arena(tsd, huge_arena_ind);
 	}
 
 	return huge_arena;
 }
 
 bool
-arena_init_huge(void) {
+arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 	bool huge_enabled;
+	assert(huge_arena_ind == 0);
 
 	/* The threshold should be large size class. */
-	if (opt_oversize_threshold > SC_LARGE_MAXCLASS ||
-	    opt_oversize_threshold < SC_LARGE_MINCLASS) {
+	if (opt_oversize_threshold > SC_LARGE_MAXCLASS
+	    || opt_oversize_threshold < SC_LARGE_MINCLASS) {
 		opt_oversize_threshold = 0;
 		oversize_threshold = SC_LARGE_MAXCLASS + PAGE;
 		huge_enabled = false;
 	} else {
 		/* Reserve the index for the huge arena. */
 		huge_arena_ind = narenas_total_get();
+		assert(huge_arena_ind != 0);
 		oversize_threshold = opt_oversize_threshold;
+		/* a0 init happened before malloc_conf_init. */
+		atomic_store_zu(&a0->pa_shard.pac.oversize_threshold,
+		    oversize_threshold, ATOMIC_RELAXED);
+		/* Initialize huge_arena_pac_thp fields. */
+		base_t *b0 = a0->base;
+		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
+		malloc_mutex_lock(tsdn, &b0->mtx);
+		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp
+		    && metadata_thp_enabled()
+		    && (opt_thp == thp_mode_do_nothing)
+		    && (init_system_thp_mode == system_thp_mode_madvise);
+		(&huge_arena_pac_thp)->auto_thp_switched =
+		    b0->auto_thp_switched;
+		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
+		    WITNESS_RANK_LEAF, malloc_mutex_rank_exclusive);
+		edata_list_active_init(&(&huge_arena_pac_thp)->thp_lazy_list);
+		malloc_mutex_unlock(tsdn, &b0->mtx);
 		huge_enabled = true;
 	}
 
 	return huge_enabled;
 }
 
-bool
-arena_is_huge(unsigned arena_ind) {
-	if (huge_arena_ind == 0) {
-		return false;
-	}
-	return (arena_ind == huge_arena_ind);
-}
-
 bool
 arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
@@ -1779,14 +1987,15 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 		    (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
 	}
 
-	uint32_t cur_offset = (uint32_t)offsetof(arena_t, bins);
+	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+	    uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
 		nbins_total += bin_infos[i].n_shards;
 		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
 	}
-	return pa_central_init(&arena_pa_central_global, base, hpa,
-	    &hpa_hooks_default);
+	return pa_central_init(
+	    &arena_pa_central_global, base, hpa, &hpa_hooks_default);
 }
 
 void
@@ -1834,14 +2043,16 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_prefork(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_prefork(tsdn, &arena->all_bins[i]);)
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_postfork_parent(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_postfork_parent(tsdn, &arena->all_bins[i]);)
 	}
 
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@@ -1879,7 +2090,8 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_postfork_child(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_postfork_child(tsdn, &arena->all_bins[i]);)
 	}
 
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
diff --git a/src/background_thread.c b/src/background_thread.c
index 3bb8d26c..4901856a 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -11,15 +11,15 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* This option should be opt-in only. */
 #define BACKGROUND_THREAD_DEFAULT false
 /* Read-only after initialization. */
-bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
+bool   opt_background_thread = BACKGROUND_THREAD_DEFAULT;
 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
 /* Indicates global state.  Atomic because decay reads this w/o locking. */
 atomic_b_t background_thread_enabled_state;
-size_t n_background_threads;
-size_t max_background_threads;
+size_t     n_background_threads;
+size_t     max_background_threads;
 /* Thread info per-index. */
 background_thread_info_t *background_thread_info;
 
@@ -32,11 +32,11 @@ static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
 
 static void
 pthread_create_wrapper_init(void) {
-#ifdef JEMALLOC_LAZY_LOCK
+#	ifdef JEMALLOC_LAZY_LOCK
 	if (!isthreaded) {
 		isthreaded = true;
 	}
-#endif
+#	endif
 }
 
 int
@@ -46,24 +46,64 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 
 	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
+
+#	ifdef JEMALLOC_HAVE_DLSYM
+#		include <dlfcn.h>
+#	endif
+
+static bool
+pthread_create_fptr_init(void) {
+	if (pthread_create_fptr != NULL) {
+		return false;
+	}
+	/*
+	 * Try the next symbol first, because 1) when use lazy_lock we have a
+	 * wrapper for pthread_create; and 2) application may define its own
+	 * wrapper as well (and can call malloc within the wrapper).
+	 */
+#	ifdef JEMALLOC_HAVE_DLSYM
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		pthread_create_fptr = dlsym(RTLD_DEFAULT, "pthread_create");
+	}
+#	else
+	pthread_create_fptr = NULL;
+#	endif
+	if (pthread_create_fptr == NULL) {
+		if (config_lazy_lock) {
+			malloc_write(
+			    "<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			    "\"pthread_create\")\n");
+			abort();
+		} else {
+			/* Fall back to the default symbol. */
+			pthread_create_fptr = pthread_create;
+		}
+	}
+
+	return false;
+}
 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
 
 #ifndef JEMALLOC_BACKGROUND_THREAD
-#define NOT_REACHED { not_reached(); }
-bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
-bool background_threads_enable(tsd_t *tsd) NOT_REACHED
-bool background_threads_disable(tsd_t *tsd) NOT_REACHED
-bool background_thread_is_started(background_thread_info_t *info) NOT_REACHED
-void background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep) NOT_REACHED
-void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
-void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
-void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
-void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
-bool background_thread_stats_read(tsdn_t *tsdn,
-    background_thread_stats_t *stats) NOT_REACHED
-void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
-#undef NOT_REACHED
+#	define NOT_REACHED                                                    \
+		{ not_reached(); }
+bool
+background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
+    bool background_threads_enable(tsd_t *tsd) NOT_REACHED
+    bool background_threads_disable(tsd_t *tsd) NOT_REACHED
+    bool background_thread_is_started(
+        background_thread_info_t *info) NOT_REACHED
+    void background_thread_wakeup_early(
+        background_thread_info_t *info, nstime_t *remaining_sleep) NOT_REACHED
+    void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
+    bool background_thread_stats_read(
+        tsdn_t *tsdn, background_thread_stats_t *stats) NOT_REACHED
+    void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
+#	undef NOT_REACHED
 #else
 
 static bool background_thread_enabled_at_fork;
@@ -80,45 +120,70 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 
 static inline bool
 set_current_thread_affinity(int cpu) {
-#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#	if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)                           \
+	    || defined(JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP)
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
-#else
-#  ifndef __NetBSD__
+#		else
+#			ifndef __NetBSD__
 	cpuset_t cpuset;
-#  else
+#			else
 	cpuset_t *cpuset;
-#  endif
-#endif
+#			endif
+#		endif
 
-#ifndef __NetBSD__
+#		ifndef __NetBSD__
 	CPU_ZERO(&cpuset);
 	CPU_SET(cpu, &cpuset);
-#else
+#		else
 	cpuset = cpuset_create();
-#endif
+#		endif
 
-#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	return (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset) != 0);
-#else
-#  ifndef __NetBSD__
-	int ret = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t),
-	    &cpuset);
-#  else
-	int ret = pthread_setaffinity_np(pthread_self(), cpuset_size(cpuset),
-	    cpuset);
+#		else
+#			ifndef __NetBSD__
+	int ret = pthread_setaffinity_np(
+	    pthread_self(), sizeof(cpuset_t), &cpuset);
+#			else
+	int ret = pthread_setaffinity_np(
+	    pthread_self(), cpuset_size(cpuset), cpuset);
 	cpuset_destroy(cpuset);
-#  endif
+#			endif
 	return ret != 0;
-#endif
+#		endif
+#	else
+	return false;
+#	endif
 }
 
-#define BILLION UINT64_C(1000000000)
+#	define BILLION UINT64_C(1000000000)
 /* Minimal sleep interval 100 ms. */
-#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
+#	define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
+
+static int
+background_thread_cond_wait(
+    background_thread_info_t *info, struct timespec *ts) {
+	int ret;
+
+	/*
+	 * pthread_cond_wait drops and re-acquires the mutex internally, w/o
+	 * going through our wrapper.  Update the locked state explicitly.
+	 */
+	atomic_store_b(&info->mtx.locked, false, ATOMIC_RELAXED);
+	if (ts == NULL) {
+		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+	} else {
+		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, ts);
+	}
+	atomic_store_b(&info->mtx.locked, true, ATOMIC_RELAXED);
+
+	return ret;
+}
 
 static void
-background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
-    uint64_t interval) {
+background_thread_sleep(
+    tsdn_t *tsdn, background_thread_info_t *info, uint64_t interval) {
 	if (config_stats) {
 		info->tot_n_runs++;
 	}
@@ -132,21 +197,21 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 
 	int ret;
 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
-		background_thread_wakeup_time_set(tsdn, info,
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+		background_thread_wakeup_time_set(
+		    tsdn, info, BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		ret = background_thread_cond_wait(info, NULL);
 		assert(ret == 0);
 	} else {
-		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
-		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS
+		    && interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		/* We need malloc clock (can be different from tv). */
 		nstime_t next_wakeup;
 		nstime_init_update(&next_wakeup);
 		nstime_iadd(&next_wakeup, interval);
-		assert(nstime_ns(&next_wakeup) <
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-		background_thread_wakeup_time_set(tsdn, info,
-		    nstime_ns(&next_wakeup));
+		assert(nstime_ns(&next_wakeup)
+		    < BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		background_thread_wakeup_time_set(
+		    tsdn, info, nstime_ns(&next_wakeup));
 
 		nstime_t ts_wakeup;
 		nstime_copy(&ts_wakeup, &before_sleep);
@@ -156,7 +221,7 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
 
 		assert(!background_thread_indefinite_sleep(info));
-		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
+		ret = background_thread_cond_wait(info, &ts);
 		assert(ret == ETIMEDOUT || ret == 0);
 	}
 	if (config_stats) {
@@ -185,11 +250,11 @@ background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
 }
 
 static inline void
-background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
-    unsigned ind) {
+background_work_sleep_once(
+    tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
 	uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
 	unsigned narenas = narenas_total_get();
-	bool slept_indefinitely = background_thread_indefinite_sleep(info);
+	bool     slept_indefinitely = background_thread_indefinite_sleep(info);
 
 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
 		arena_t *arena = arena_get(tsdn, i, false);
@@ -219,11 +284,10 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
 	if (ns_until_deferred == BACKGROUND_THREAD_DEFERRED_MAX) {
 		sleep_ns = BACKGROUND_THREAD_INDEFINITE_SLEEP;
 	} else {
-		sleep_ns =
-		    (ns_until_deferred < BACKGROUND_THREAD_MIN_INTERVAL_NS)
+		sleep_ns = (ns_until_deferred
+		               < BACKGROUND_THREAD_MIN_INTERVAL_NS)
 		    ? BACKGROUND_THREAD_MIN_INTERVAL_NS
 		    : ns_until_deferred;
-
 	}
 
 	background_thread_sleep(tsdn, info, sleep_ns);
@@ -232,11 +296,11 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
 static bool
 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
 	if (info == &background_thread_info[0]) {
-		malloc_mutex_assert_owner(tsd_tsdn(tsd),
-		    &background_thread_lock);
+		malloc_mutex_assert_owner(
+		    tsd_tsdn(tsd), &background_thread_lock);
 	} else {
-		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
-		    &background_thread_lock);
+		malloc_mutex_assert_not_owner(
+		    tsd_tsdn(tsd), &background_thread_lock);
 	}
 
 	pre_reentrancy(tsd, NULL);
@@ -280,21 +344,23 @@ background_thread_create_signals_masked(pthread_t *thread,
 	sigset_t set;
 	sigfillset(&set);
 	sigset_t oldset;
-	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
+	int      mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
 	if (mask_err != 0) {
 		return mask_err;
 	}
-	int create_err = pthread_create_wrapper(thread, attr, start_routine,
-	    arg);
+	int create_err = pthread_create_wrapper(
+	    thread, attr, start_routine, arg);
 	/*
 	 * Restore the signal mask.  Failure to restore the signal mask here
 	 * changes program behavior.
 	 */
 	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 	if (restore_err != 0) {
-		malloc_printf("<jemalloc>: background thread creation "
+		malloc_printf(
+		    "<jemalloc>: background thread creation "
 		    "failed (%d), and signal mask restoration failed "
-		    "(%d)\n", create_err, restore_err);
+		    "(%d)\n",
+		    create_err, restore_err);
 		if (opt_abort) {
 			abort();
 		}
@@ -303,7 +369,8 @@ background_thread_create_signals_masked(pthread_t *thread,
 }
 
 static bool
-check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
+check_background_thread_creation(tsd_t *tsd,
+    const size_t const_max_background_threads, unsigned *n_created,
     bool *created_threads) {
 	bool ret = false;
 	if (likely(*n_created == n_background_threads)) {
@@ -312,7 +379,7 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
-	for (unsigned i = 1; i < max_background_threads; i++) {
+	for (unsigned i = 1; i < const_max_background_threads; i++) {
 		if (created_threads[i]) {
 			continue;
 		}
@@ -330,6 +397,7 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 
 		pre_reentrancy(tsd, NULL);
 		int err = background_thread_create_signals_masked(&info->thread,
+		    /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
 
@@ -337,8 +405,10 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 			(*n_created)++;
 			created_threads[i] = true;
 		} else {
-			malloc_printf("<jemalloc>: background thread "
-			    "creation failed (%d)\n", err);
+			malloc_printf(
+			    "<jemalloc>: background thread "
+			    "creation failed (%d)\n",
+			    err);
 			if (opt_abort) {
 				abort();
 			}
@@ -354,25 +424,35 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 
 static void
 background_thread0_work(tsd_t *tsd) {
-	/* Thread0 is also responsible for launching / terminating threads. */
-	VARIABLE_ARRAY(bool, created_threads, max_background_threads);
+	/*
+	 * Thread0 is also responsible for launching / terminating threads.
+	 * We are guaranteed that `max_background_threads` will not change
+	 * underneath us. Unfortunately static analysis tools do not understand
+	 * this, so we are extracting `max_background_threads` into a local
+	 * variable solely for the sake of exposing this information to such
+	 * tools.
+	 */
+	const size_t const_max_background_threads = max_background_threads;
+	assert(const_max_background_threads > 0);
+	VARIABLE_ARRAY(bool, created_threads, const_max_background_threads);
 	unsigned i;
-	for (i = 1; i < max_background_threads; i++) {
+	for (i = 1; i < const_max_background_threads; i++) {
 		created_threads[i] = false;
 	}
 	/* Start working, and create more threads when asked. */
 	unsigned n_created = 1;
 	while (background_thread_info[0].state != background_thread_stopped) {
-		if (background_thread_pause_check(tsd_tsdn(tsd),
-		    &background_thread_info[0])) {
+		if (background_thread_pause_check(
+		        tsd_tsdn(tsd), &background_thread_info[0])) {
 			continue;
 		}
-		if (check_background_thread_creation(tsd, &n_created,
-		    (bool *)&created_threads)) {
+		if (check_background_thread_creation(tsd,
+		        const_max_background_threads, &n_created,
+		        created_threads)) {
 			continue;
 		}
-		background_work_sleep_once(tsd_tsdn(tsd),
-		    &background_thread_info[0], 0);
+		background_work_sleep_once(
+		    tsd_tsdn(tsd), &background_thread_info[0], 0);
 	}
 
 	/*
@@ -380,7 +460,7 @@ background_thread0_work(tsd_t *tsd) {
 	 * the global background_thread mutex (and is waiting) for us.
 	 */
 	assert(!background_thread_enabled());
-	for (i = 1; i < max_background_threads; i++) {
+	for (i = 1; i < const_max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		assert(info->state != background_thread_paused);
 		if (created_threads[i]) {
@@ -389,8 +469,8 @@ background_thread0_work(tsd_t *tsd) {
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 			if (info->state != background_thread_stopped) {
 				/* The thread was not created. */
-				assert(info->state ==
-				    background_thread_started);
+				assert(
+				    info->state == background_thread_started);
 				n_background_threads--;
 				info->state = background_thread_stopped;
 			}
@@ -406,14 +486,14 @@ background_work(tsd_t *tsd, unsigned ind) {
 	background_thread_info_t *info = &background_thread_info[ind];
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
-	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+	background_thread_wakeup_time_set(
+	    tsd_tsdn(tsd), info, BACKGROUND_THREAD_INDEFINITE_SLEEP);
 	if (ind == 0) {
 		background_thread0_work(tsd);
 	} else {
 		while (info->state != background_thread_stopped) {
-			if (background_thread_pause_check(tsd_tsdn(tsd),
-			    info)) {
+			if (background_thread_pause_check(
+			        tsd_tsdn(tsd), info)) {
 				continue;
 			}
 			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
@@ -428,11 +508,11 @@ static void *
 background_thread_entry(void *ind_arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
 	assert(thread_ind < max_background_threads);
-#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+#	ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
-#elif defined(__FreeBSD__) || defined(__DragonFly__)
+#	elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
 	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
-#endif
+#	endif
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		set_current_thread_affinity((int)thread_ind);
 	}
@@ -442,8 +522,8 @@ background_thread_entry(void *ind_arg) {
 	 * turn triggers another background thread creation).
 	 */
 	background_work(tsd_internal_fetch(), thread_ind);
-	assert(pthread_equal(pthread_self(),
-	    background_thread_info[thread_ind].thread));
+	assert(pthread_equal(
+	    pthread_self(), background_thread_info[thread_ind].thread));
 
 	return NULL;
 }
@@ -467,8 +547,13 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 
 	bool need_new_thread;
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	need_new_thread = background_thread_enabled() &&
-	    (info->state == background_thread_stopped);
+	/*
+	 * The last check is there to leave Thread 0 creation entirely
+	 * to the initializing thread (arena 0).
+	 */
+	need_new_thread = background_thread_enabled()
+	    && (info->state == background_thread_stopped)
+	    && (thread_ind != 0 || arena_ind == 0);
 	if (need_new_thread) {
 		background_thread_init(tsd, info);
 	}
@@ -480,7 +565,6 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 		/* Threads are created asynchronously by Thread 0. */
 		background_thread_info_t *t0 = &background_thread_info[0];
 		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
-		assert(t0->state == background_thread_started);
 		pthread_cond_signal(&t0->cond);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
 
@@ -493,12 +577,15 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 	 * background threads with the underlying pthread_create.
 	 */
 	int err = background_thread_create_signals_masked(&info->thread, NULL,
+	    /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	    background_thread_entry, (void *)thread_ind);
 	post_reentrancy(tsd);
 
 	if (err != 0) {
-		malloc_printf("<jemalloc>: arena 0 background thread creation "
-		    "failed (%d)\n", err);
+		malloc_printf(
+		    "<jemalloc>: arena 0 background thread creation "
+		    "failed (%d)\n",
+		    err);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		info->state = background_thread_stopped;
 		n_background_threads--;
@@ -531,7 +618,7 @@ background_threads_enable(tsd_t *tsd) {
 
 	VARIABLE_ARRAY(bool, marked, max_background_threads);
 	unsigned nmarked;
-	for (unsigned i = 0; i < max_background_threads; i++) {
+	for (size_t i = 0; i < max_background_threads; i++) {
 		marked[i] = false;
 	}
 	nmarked = 0;
@@ -540,12 +627,12 @@ background_threads_enable(tsd_t *tsd) {
 	/* Mark the threads we need to create for thread 0. */
 	unsigned narenas = narenas_total_get();
 	for (unsigned i = 1; i < narenas; i++) {
-		if (marked[i % max_background_threads] ||
-		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
+		if (marked[i % max_background_threads]
+		    || arena_get(tsd_tsdn(tsd), i, false) == NULL) {
 			continue;
 		}
-		background_thread_info_t *info = &background_thread_info[
-		    i % max_background_threads];
+		background_thread_info_t *info =
+		    &background_thread_info[i % max_background_threads];
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		assert(info->state == background_thread_stopped);
 		background_thread_init(tsd, info);
@@ -563,8 +650,8 @@ background_threads_enable(tsd_t *tsd) {
 	for (unsigned i = 0; i < narenas; i++) {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
 		if (arena != NULL) {
-			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
-			    &arena->pa_shard, true);
+			pa_shard_set_deferral_allowed(
+			    tsd_tsdn(tsd), &arena->pa_shard, true);
 		}
 	}
 	return false;
@@ -576,8 +663,8 @@ background_threads_disable(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
 
 	/* Thread 0 will be responsible for terminating other threads. */
-	if (background_threads_disable_single(tsd,
-	    &background_thread_info[0])) {
+	if (background_threads_disable_single(
+	        tsd, &background_thread_info[0])) {
 		return true;
 	}
 	assert(n_background_threads == 0);
@@ -585,8 +672,8 @@ background_threads_disable(tsd_t *tsd) {
 	for (unsigned i = 0; i < narenas; i++) {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
 		if (arena != NULL) {
-			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
-			    &arena->pa_shard, false);
+			pa_shard_set_deferral_allowed(
+			    tsd_tsdn(tsd), &arena->pa_shard, false);
 		}
 	}
 
@@ -599,15 +686,15 @@ background_thread_is_started(background_thread_info_t *info) {
 }
 
 void
-background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep) {
+background_thread_wakeup_early(
+    background_thread_info_t *info, nstime_t *remaining_sleep) {
 	/*
 	 * This is an optimization to increase batching. At this point
 	 * we know that background thread wakes up soon, so the time to cache
 	 * the just freed memory is bounded and low.
 	 */
-	if (remaining_sleep != NULL && nstime_ns(remaining_sleep) <
-	    BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+	if (remaining_sleep != NULL
+	    && nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
 	pthread_cond_signal(&info->cond);
@@ -629,8 +716,8 @@ background_thread_prefork1(tsdn_t *tsdn) {
 void
 background_thread_postfork_parent(tsdn_t *tsdn) {
 	for (unsigned i = 0; i < max_background_threads; i++) {
-		malloc_mutex_postfork_parent(tsdn,
-		    &background_thread_info[i].mtx);
+		malloc_mutex_postfork_parent(
+		    tsdn, &background_thread_info[i].mtx);
 	}
 	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
 }
@@ -638,8 +725,8 @@ background_thread_postfork_parent(tsdn_t *tsdn) {
 void
 background_thread_postfork_child(tsdn_t *tsdn) {
 	for (unsigned i = 0; i < max_background_threads; i++) {
-		malloc_mutex_postfork_child(tsdn,
-		    &background_thread_info[i].mtx);
+		malloc_mutex_postfork_child(
+		    tsdn, &background_thread_info[i].mtx);
 	}
 	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
 	if (!background_thread_enabled_at_fork) {
@@ -688,8 +775,8 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		if (info->state != background_thread_stopped) {
 			num_runs += info->tot_n_runs;
 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
-			malloc_mutex_prof_max_update(tsdn,
-			    &stats->max_counter_per_bg_thd, &info->mtx);
+			malloc_mutex_prof_max_update(
+			    tsdn, &stats->max_counter_per_bg_thd, &info->mtx);
 		}
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
@@ -702,42 +789,9 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 	return false;
 }
 
-#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
-#undef BILLION
-#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
-
-#ifdef JEMALLOC_HAVE_DLSYM
-#include <dlfcn.h>
-#endif
-
-static bool
-pthread_create_fptr_init(void) {
-	if (pthread_create_fptr != NULL) {
-		return false;
-	}
-	/*
-	 * Try the next symbol first, because 1) when use lazy_lock we have a
-	 * wrapper for pthread_create; and 2) application may define its own
-	 * wrapper as well (and can call malloc within the wrapper).
-	 */
-#ifdef JEMALLOC_HAVE_DLSYM
-	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
-#else
-	pthread_create_fptr = NULL;
-#endif
-	if (pthread_create_fptr == NULL) {
-		if (config_lazy_lock) {
-			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
-			    "\"pthread_create\")\n");
-			abort();
-		} else {
-			/* Fall back to the default symbol. */
-			pthread_create_fptr = pthread_create;
-		}
-	}
-
-	return false;
-}
+#	undef BACKGROUND_THREAD_NPAGES_THRESHOLD
+#	undef BILLION
+#	undef BACKGROUND_THREAD_MIN_INTERVAL_NS
 
 /*
  * When lazy lock is enabled, we need to make sure setting isthreaded before
@@ -748,24 +802,24 @@ pthread_create_fptr_init(void) {
 void
 background_thread_ctl_init(tsdn_t *tsdn) {
 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
-#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+#	ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 	pthread_create_fptr_init();
 	pthread_create_wrapper_init();
-#endif
+#	endif
 }
 
 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
 
-bool
-background_thread_boot0(void) {
+    bool background_thread_boot0(void) {
 	if (!have_background_thread && opt_background_thread) {
-		malloc_printf("<jemalloc>: option background_thread currently "
+		malloc_printf(
+		    "<jemalloc>: option background_thread currently "
 		    "supports pthread only\n");
 		return true;
 	}
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
-	if ((config_lazy_lock || opt_background_thread) &&
-	    pthread_create_fptr_init()) {
+	if ((config_lazy_lock || opt_background_thread)
+	    && pthread_create_fptr_init()) {
 		return true;
 	}
 #endif
@@ -783,17 +837,16 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 	}
 	max_background_threads = opt_max_background_threads;
 
-	background_thread_enabled_set(tsdn, opt_background_thread);
 	if (malloc_mutex_init(&background_thread_lock,
-	    "background_thread_global",
-	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
-	    malloc_mutex_rank_exclusive)) {
+	        "background_thread_global",
+	        WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
-	    base, opt_max_background_threads *
-	    sizeof(background_thread_info_t), CACHELINE);
+	    base, opt_max_background_threads * sizeof(background_thread_info_t),
+	    CACHELINE);
 	if (background_thread_info == NULL) {
 		return true;
 	}
@@ -802,8 +855,8 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 		background_thread_info_t *info = &background_thread_info[i];
 		/* Thread mutex is rank_inclusive because of thread0. */
 		if (malloc_mutex_init(&info->mtx, "background_thread",
-		    WITNESS_RANK_BACKGROUND_THREAD,
-		    malloc_mutex_address_ordered)) {
+		        WITNESS_RANK_BACKGROUND_THREAD,
+		        malloc_mutex_address_ordered)) {
 			return true;
 		}
 		if (pthread_cond_init(&info->cond, NULL)) {
@@ -814,7 +867,8 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 		background_thread_info_init(tsdn, info);
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
+	/* Using _impl to bypass the locking check during init. */
+	background_thread_enabled_set_impl(opt_background_thread);
 #endif
-
 	return false;
 }
diff --git a/src/base.c b/src/base.c
index 7f4d6756..ef7f0dd4 100644
--- a/src/base.c
+++ b/src/base.c
@@ -12,7 +12,7 @@
  * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
  */
 
-#define BASE_AUTO_THP_THRESHOLD    2
+#define BASE_AUTO_THP_THRESHOLD 2
 #define BASE_AUTO_THP_THRESHOLD_A0 5
 
 /******************************************************************************/
@@ -22,45 +22,46 @@ static base_t *b0;
 
 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
 
-const char *metadata_thp_mode_names[] = {
-	"disabled",
-	"auto",
-	"always"
-};
+const char *const metadata_thp_mode_names[] = {"disabled", "auto", "always"};
 
 /******************************************************************************/
 
 static inline bool
 metadata_thp_madvise(void) {
 	return (metadata_thp_enabled() &&
-	    (init_system_thp_mode == thp_mode_default));
+	    (init_system_thp_mode == system_thp_mode_madvise));
 }
 
 static void *
 base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	void *addr;
-	bool zero = true;
-	bool commit = true;
+	bool  zero = true;
+	bool  commit = true;
 
-	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
-	assert(size == HUGEPAGE_CEILING(size));
-	size_t alignment = HUGEPAGE;
+	/*
+	 * Use huge page sizes and alignment when opt_metadata_thp is enabled
+	 * or auto.
+	 */
+	size_t alignment;
+	if (opt_metadata_thp == metadata_thp_disabled) {
+		alignment = BASE_BLOCK_MIN_ALIGN;
+	} else {
+		assert(size == HUGEPAGE_CEILING(size));
+		alignment = HUGEPAGE;
+	}
 	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
-		if (have_madvise_huge && addr) {
-			pages_set_thp_state(addr, size);
-		}
 	} else {
-		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
-		    &commit);
+		addr = ehooks_alloc(
+		    tsdn, ehooks, NULL, size, alignment, &zero, &commit);
 	}
 
 	return addr;
 }
 
 static void
-base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
-    size_t size) {
+base_unmap(
+    tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr, size_t size) {
 	/*
 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
 	 * stopping at first success.  This cascade is performed for consistency
@@ -104,21 +105,31 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 label_done:
 	if (metadata_thp_madvise()) {
 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
-		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
-		    (size & HUGEPAGE_MASK) == 0);
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0
+		    && (size & HUGEPAGE_MASK) == 0);
 		pages_nohuge(addr, size);
 	}
 }
 
+static inline bool
+base_edata_is_reused(edata_t *edata) {
+	/*
+	 * Borrow the guarded bit to indicate if the extent is a recycled one,
+	 * i.e. the ones returned to base for reuse; currently only tcache bin
+	 * stacks.  Skips stats updating if so (needed for this purpose only).
+	 */
+	return edata_guarded_get(edata);
+}
+
 static void
-base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
-    size_t size) {
+base_edata_init(
+    size_t *extent_sn_next, edata_t *edata, void *addr, size_t size) {
 	size_t sn;
 
 	sn = *extent_sn_next;
 	(*extent_sn_next)++;
 
-	edata_binit(edata, addr, size, sn);
+	edata_binit(edata, addr, size, sn, false /* is_reused */);
 }
 
 static size_t
@@ -135,6 +146,42 @@ base_get_num_blocks(base_t *base, bool with_new_block) {
 	return n_blocks;
 }
 
+static void
+huge_arena_auto_thp_switch(tsdn_t *tsdn, pac_thp_t *pac_thp) {
+	assert(opt_huge_arena_pac_thp);
+#ifdef JEMALLOC_JET
+	if (pac_thp->auto_thp_switched) {
+		return;
+	}
+#else
+	/*
+	 * The switch should be turned on only once when the b0 auto thp switch is
+	 * turned on, unless it's a unit test where b0 gets deleted and then
+	 * recreated.
+	 */
+	assert(!pac_thp->auto_thp_switched);
+#endif
+
+	edata_list_active_t *pending_list;
+	malloc_mutex_lock(tsdn, &pac_thp->lock);
+	pending_list = &pac_thp->thp_lazy_list;
+	pac_thp->auto_thp_switched = true;
+	malloc_mutex_unlock(tsdn, &pac_thp->lock);
+
+	unsigned cnt = 0;
+	edata_t *edata;
+	ql_foreach (edata, &pending_list->head, ql_link_active) {
+		assert(edata != NULL);
+		void  *addr = edata_addr_get(edata);
+		size_t size = edata_size_get(edata);
+		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+		assert(HUGEPAGE_CEILING(size) == size && size != 0);
+		pages_huge(addr, size);
+		cnt++;
+	}
+	assert(cnt == atomic_load_u(&pac_thp->n_thp_lazy, ATOMIC_RELAXED));
+}
+
 static void
 base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 	assert(opt_metadata_thp == metadata_thp_auto);
@@ -145,11 +192,11 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 	/* Called when adding a new block. */
 	bool should_switch;
 	if (base_ind_get(base) != 0) {
-		should_switch = (base_get_num_blocks(base, true) ==
-		    BASE_AUTO_THP_THRESHOLD);
+		should_switch = (base_get_num_blocks(base, true)
+		    == BASE_AUTO_THP_THRESHOLD);
 	} else {
-		should_switch = (base_get_num_blocks(base, true) ==
-		    BASE_AUTO_THP_THRESHOLD_A0);
+		should_switch = (base_get_num_blocks(base, true)
+		    == BASE_AUTO_THP_THRESHOLD_A0);
 	}
 	if (!should_switch) {
 		return;
@@ -163,77 +210,141 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 		assert((block->size & HUGEPAGE_MASK) == 0);
 		pages_huge(block, block->size);
 		if (config_stats) {
-			base->n_thp += HUGEPAGE_CEILING(block->size -
-			    edata_bsize_get(&block->edata)) >> LG_HUGEPAGE;
+			base->n_thp += HUGEPAGE_CEILING(block->size
+			                   - edata_bsize_get(&block->edata))
+			    >> LG_HUGEPAGE;
 		}
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
 	}
+
+	/* Handle the THP auto switch for the huge arena. */
+	if (!huge_arena_pac_thp.thp_madvise || base_ind_get(base) != 0) {
+		/*
+		 * The huge arena THP auto-switch is triggered only by b0 switch,
+		 * provided that the huge arena is initialized. If b0 switch is enabled
+		 * before huge arena is ready, the huge arena switch will be enabled
+		 * during huge_arena_pac_thp initialization.
+		 */
+		return;
+	}
+	/*
+	 * thp_madvise above is by default false and set in arena_init_huge() with
+	 * b0 mtx held. So if we reach here, it means the entire huge_arena_pac_thp
+	 * is initialized and we can safely switch the THP.
+	 */
+	malloc_mutex_unlock(tsdn, &base->mtx);
+	huge_arena_auto_thp_switch(tsdn, &huge_arena_pac_thp);
+	malloc_mutex_lock(tsdn, &base->mtx);
 }
 
 static void *
-base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
-    size_t alignment) {
+base_extent_bump_alloc_helper(
+    edata_t *edata, size_t *gap_size, size_t size, size_t alignment) {
 	void *ret;
 
 	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
 	assert(size == ALIGNMENT_CEILING(size, alignment));
 
-	*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
-	    alignment) - (uintptr_t)edata_addr_get(edata);
-	ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size);
+	*gap_size = ALIGNMENT_CEILING(
+	                (uintptr_t)edata_addr_get(edata), alignment)
+	    - (uintptr_t)edata_addr_get(edata);
+	ret = (void *)((byte_t *)edata_addr_get(edata) + *gap_size);
 	assert(edata_bsize_get(edata) >= *gap_size + size);
-	edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) +
-	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
-	    edata_sn_get(edata));
+	edata_binit(edata,
+	    (void *)((byte_t *)edata_addr_get(edata) + *gap_size + size),
+	    edata_bsize_get(edata) - *gap_size - size, edata_sn_get(edata),
+	    base_edata_is_reused(edata));
 	return ret;
 }
 
 static void
-base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
-    void *addr, size_t size) {
-	if (edata_bsize_get(edata) > 0) {
-		/*
-		 * Compute the index for the largest size class that does not
-		 * exceed extent's size.
-		 */
-		szind_t index_floor =
-		    sz_size2index(edata_bsize_get(edata) + 1) - 1;
-		edata_heap_insert(&base->avail[index_floor], edata);
+base_edata_heap_insert(tsdn_t *tsdn, base_t *base, edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &base->mtx);
+
+	size_t bsize = edata_bsize_get(edata);
+	assert(bsize > 0);
+	/*
+	 * Compute the index for the largest size class that does not exceed
+	 * extent's size.
+	 */
+	szind_t index_floor = sz_size2index(bsize + 1) - 1;
+	edata_heap_insert(&base->avail[index_floor], edata);
+}
+
+/*
+ * Only can be called by top-level functions, since it may call base_alloc
+ * internally when cache is empty.
+ */
+static edata_t *
+base_alloc_base_edata(tsdn_t *tsdn, base_t *base) {
+	edata_t *edata;
+
+	malloc_mutex_lock(tsdn, &base->mtx);
+	edata = edata_avail_first(&base->edata_avail);
+	if (edata != NULL) {
+		edata_avail_remove(&base->edata_avail, edata);
+	}
+	malloc_mutex_unlock(tsdn, &base->mtx);
+
+	if (edata == NULL) {
+		edata = base_alloc_edata(tsdn, base);
 	}
 
-	if (config_stats) {
+	return edata;
+}
+
+static void
+base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, edata_t *edata,
+    size_t gap_size, void *addr, size_t size) {
+	if (edata_bsize_get(edata) > 0) {
+		base_edata_heap_insert(tsdn, base, edata);
+	} else {
+		/* Freed base edata_t stored in edata_avail. */
+		edata_avail_insert(&base->edata_avail, edata);
+	}
+
+	if (config_stats && !base_edata_is_reused(edata)) {
 		base->allocated += size;
 		/*
 		 * Add one PAGE to base_resident for every page boundary that is
 		 * crossed by the new allocation. Adjust n_thp similarly when
 		 * metadata_thp is enabled.
 		 */
-		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
-		    PAGE_CEILING((uintptr_t)addr - gap_size);
+		base->resident += PAGE_CEILING((uintptr_t)addr + size)
+		    - PAGE_CEILING((uintptr_t)addr - gap_size);
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
-		if (metadata_thp_madvise() && (opt_metadata_thp ==
-		    metadata_thp_always || base->auto_thp_switched)) {
+		if (metadata_thp_madvise()
+		    && (opt_metadata_thp == metadata_thp_always
+		        || base->auto_thp_switched)) {
 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
-			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
-			    LG_HUGEPAGE;
+			                   - HUGEPAGE_CEILING(
+			                       (uintptr_t)addr - gap_size))
+			    >> LG_HUGEPAGE;
 			assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
 		}
 	}
 }
 
 static void *
-base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size,
-    size_t alignment) {
-	void *ret;
+base_extent_bump_alloc(
+    tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size, size_t alignment) {
+	void  *ret;
 	size_t gap_size;
 
 	ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
-	base_extent_bump_alloc_post(base, edata, gap_size, ret, size);
+	base_extent_bump_alloc_post(tsdn, base, edata, gap_size, ret, size);
 	return ret;
 }
 
+static size_t
+base_block_size_ceil(size_t block_size) {
+	return opt_metadata_thp == metadata_thp_disabled
+	    ? ALIGNMENT_CEILING(block_size, BASE_BLOCK_MIN_ALIGN)
+	    : HUGEPAGE_CEILING(block_size);
+}
+
 /*
  * Allocate a block of virtual memory that is large enough to start with a
  * base_block_t header, followed by an object of specified size and alignment.
@@ -246,36 +357,38 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
 	size_t header_size = sizeof(base_block_t);
-	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
-	    header_size;
+	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment)
+	    - header_size;
 	/*
 	 * Create increasingly larger blocks in order to limit the total number
 	 * of disjoint virtual memory ranges.  Choose the next size in the page
 	 * size class series (skipping size classes that are not a multiple of
-	 * HUGEPAGE), or a size large enough to satisfy the requested size and
-	 * alignment, whichever is larger.
+	 * HUGEPAGE when using metadata_thp), or a size large enough to satisfy
+	 * the requested size and alignment, whichever is larger.
 	 */
-	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
-	    + usize));
-	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
-	    *pind_last + 1 : *pind_last;
-	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
-	size_t block_size = (min_block_size > next_block_size) ? min_block_size
-	    : next_block_size;
-	base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
-	    block_size);
+	size_t min_block_size = base_block_size_ceil(
+	    sz_psz2u(header_size + gap_size + usize));
+	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS))
+	    ? *pind_last + 1
+	    : *pind_last;
+	size_t   next_block_size = base_block_size_ceil(sz_pind2sz(pind_next));
+	size_t   block_size = (min_block_size > next_block_size)
+	      ? min_block_size
+	      : next_block_size;
+	base_block_t *block = (base_block_t *)base_map(
+	    tsdn, ehooks, ind, block_size);
 	if (block == NULL) {
 		return NULL;
 	}
 
 	if (metadata_thp_madvise()) {
 		void *addr = (void *)block;
-		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
-		    (block_size & HUGEPAGE_MASK) == 0);
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0
+		    && (block_size & HUGEPAGE_MASK) == 0);
 		if (opt_metadata_thp == metadata_thp_always) {
 			pages_huge(addr, block_size);
-		} else if (opt_metadata_thp == metadata_thp_auto &&
-		    base != NULL) {
+		} else if (opt_metadata_thp == metadata_thp_auto
+		    && base != NULL) {
 			/* base != NULL indicates this is not a new base. */
 			malloc_mutex_lock(tsdn, &base->mtx);
 			base_auto_thp_switch(tsdn, base);
@@ -291,7 +404,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	block->next = NULL;
 	assert(block_size >= header_size);
 	base_edata_init(extent_sn_next, &block->edata,
-	    (void *)((uintptr_t)block + header_size), block_size - header_size);
+	    (void *)((byte_t *)block + header_size), block_size - header_size);
 	return block;
 }
 
@@ -322,12 +435,12 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->allocated += sizeof(base_block_t);
 		base->resident += PAGE_CEILING(sizeof(base_block_t));
 		base->mapped += block->size;
-		if (metadata_thp_madvise() &&
-		    !(opt_metadata_thp == metadata_thp_auto
-		      && !base->auto_thp_switched)) {
+		if (metadata_thp_madvise()
+		    && !(opt_metadata_thp == metadata_thp_auto
+		        && !base->auto_thp_switched)) {
 			assert(base->n_thp > 0);
-			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
-			    LG_HUGEPAGE;
+			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t))
+			    >> LG_HUGEPAGE;
 		}
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
@@ -345,7 +458,7 @@ base_t *
 base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
     bool metadata_use_hooks) {
 	pszind_t pind_last = 0;
-	size_t extent_sn_next = 0;
+	size_t   extent_sn_next = 0;
 
 	/*
 	 * The base will contain the ehooks eventually, but it itself is
@@ -353,9 +466,10 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, metadata_use_hooks ?
-	    (extent_hooks_t *)extent_hooks :
-	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
+	ehooks_init(&fake_ehooks,
+	    metadata_use_hooks ? (extent_hooks_t *)extent_hooks
+	                       : (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -363,17 +477,18 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 		return NULL;
 	}
 
-	size_t gap_size;
-	size_t base_alignment = CACHELINE;
-	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
-	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
-	    &gap_size, base_size, base_alignment);
+	size_t  gap_size;
+	size_t  base_alignment = CACHELINE;
+	size_t  base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
+	base_t *base = (base_t *)base_extent_bump_alloc_helper(
+	    &block->edata, &gap_size, base_size, base_alignment);
 	ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
-	ehooks_init(&base->ehooks_base, metadata_use_hooks ?
-	    (extent_hooks_t *)extent_hooks :
-	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
+	ehooks_init(&base->ehooks_base,
+	    metadata_use_hooks ? (extent_hooks_t *)extent_hooks
+	                       : (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
 		return NULL;
 	}
@@ -384,32 +499,41 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		edata_heap_new(&base->avail[i]);
 	}
+	edata_avail_new(&base->edata_avail);
+
 	if (config_stats) {
+		base->edata_allocated = 0;
+		base->rtree_allocated = 0;
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
 		base->mapped = block->size;
-		base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
-		    metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
-		    >> LG_HUGEPAGE : 0;
+		base->n_thp = (opt_metadata_thp == metadata_thp_always)
+		        && metadata_thp_madvise()
+		    ? HUGEPAGE_CEILING(sizeof(base_block_t)) >> LG_HUGEPAGE
+		    : 0;
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
-	base_extent_bump_alloc_post(base, &block->edata, gap_size, base,
-	    base_size);
+
+	/* Locking here is only necessary because of assertions. */
+	malloc_mutex_lock(tsdn, &base->mtx);
+	base_extent_bump_alloc_post(
+	    tsdn, base, &block->edata, gap_size, base, base_size);
+	malloc_mutex_unlock(tsdn, &base->mtx);
 
 	return base;
 }
 
 void
 base_delete(tsdn_t *tsdn, base_t *base) {
-	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
+	ehooks_t     *ehooks = base_ehooks_get_for_metadata(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
 		next = block->next;
-		base_unmap(tsdn, ehooks, base_ind_get(base), block,
-		    block->size);
+		base_unmap(
+		    tsdn, ehooks, base_ind_get(base), block, block->size);
 	} while (next != NULL);
 }
 
@@ -425,15 +549,15 @@ base_ehooks_get_for_metadata(base_t *base) {
 
 extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
-	extent_hooks_t *old_extent_hooks =
-	    ehooks_get_extent_hooks_ptr(&base->ehooks);
+	extent_hooks_t *old_extent_hooks = ehooks_get_extent_hooks_ptr(
+	    &base->ehooks);
 	ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks));
 	return old_extent_hooks;
 }
 
 static void *
 base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
-    size_t *esn) {
+    size_t *esn, size_t *ret_usize) {
 	alignment = QUANTUM_CEILING(alignment);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
 	size_t asize = usize + alignment - QUANTUM;
@@ -457,10 +581,13 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 		goto label_return;
 	}
 
-	ret = base_extent_bump_alloc(base, edata, usize, alignment);
+	ret = base_extent_bump_alloc(tsdn, base, edata, usize, alignment);
 	if (esn != NULL) {
 		*esn = (size_t)edata_sn_get(edata);
 	}
+	if (ret_usize != NULL) {
+		*ret_usize = usize;
+	}
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
 	return ret;
@@ -476,30 +603,121 @@ label_return:
  */
 void *
 base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
-	return base_alloc_impl(tsdn, base, size, alignment, NULL);
+	return base_alloc_impl(tsdn, base, size, alignment, NULL, NULL);
 }
 
 edata_t *
 base_alloc_edata(tsdn_t *tsdn, base_t *base) {
-	size_t esn;
-	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
-	    EDATA_ALIGNMENT, &esn);
+	size_t   esn, usize;
+	edata_t *edata = base_alloc_impl(
+	    tsdn, base, sizeof(edata_t), EDATA_ALIGNMENT, &esn, &usize);
 	if (edata == NULL) {
 		return NULL;
 	}
+	if (config_stats) {
+		base->edata_allocated += usize;
+	}
 	edata_esn_set(edata, esn);
 	return edata;
 }
 
+void *
+base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size) {
+	size_t usize;
+	void  *rtree = base_alloc_impl(
+            tsdn, base, size, CACHELINE, NULL, &usize);
+	if (rtree == NULL) {
+		return NULL;
+	}
+	if (config_stats) {
+		base->rtree_allocated += usize;
+	}
+	return rtree;
+}
+
+static inline void
+b0_alloc_header_size(size_t *header_size, size_t *alignment) {
+	*alignment = QUANTUM;
+	*header_size = QUANTUM > sizeof(edata_t *) ? QUANTUM
+	                                           : sizeof(edata_t *);
+}
+
+/*
+ * Each piece allocated here is managed by a separate edata, because it was bump
+ * allocated and cannot be merged back into the original base_block.  This means
+ * it's not for general purpose: 1) they are not page aligned, nor page sized,
+ * and 2) the requested size should not be too small (as each piece comes with
+ * an edata_t).  Only used for tcache bin stack allocation now.
+ */
+void *
+b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
+	base_t  *base = b0get();
+	edata_t *edata = base_alloc_base_edata(tsdn, base);
+	if (edata == NULL) {
+		return NULL;
+	}
+
+	/*
+	 * Reserve room for the header, which stores a pointer to the managing
+	 * edata_t.  The header itself is located right before the return
+	 * address, so that edata can be retrieved on dalloc.  Bump up to usize
+	 * to improve reusability -- otherwise the freed stacks will be put back
+	 * into the previous size class.
+	 */
+	size_t esn, alignment, header_size;
+	b0_alloc_header_size(&header_size, &alignment);
+
+	size_t alloc_size = sz_s2u(stack_size + header_size);
+	void  *addr = base_alloc_impl(
+            tsdn, base, alloc_size, alignment, &esn, NULL);
+	if (addr == NULL) {
+		edata_avail_insert(&base->edata_avail, edata);
+		return NULL;
+	}
+
+	/* Set is_reused: see comments in base_edata_is_reused. */
+	edata_binit(edata, addr, alloc_size, esn, true /* is_reused */);
+	*(edata_t **)addr = edata;
+
+	return (byte_t *)addr + header_size;
+}
+
 void
-base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
+b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack) {
+	/* edata_t pointer stored in header. */
+	size_t alignment, header_size;
+	b0_alloc_header_size(&header_size, &alignment);
+
+	edata_t *edata = *(edata_t **)((byte_t *)tcache_stack - header_size);
+	void    *addr = edata_addr_get(edata);
+	size_t   bsize = edata_bsize_get(edata);
+	/* Marked as "reused" to avoid double counting stats. */
+	assert(base_edata_is_reused(edata));
+	assert(addr != NULL && bsize > 0);
+
+	/* Zero out since base_alloc returns zeroed memory. */
+	memset(addr, 0, bsize);
+
+	base_t *base = b0get();
+	malloc_mutex_lock(tsdn, &base->mtx);
+	base_edata_heap_insert(tsdn, base, edata);
+	malloc_mutex_unlock(tsdn, &base->mtx);
+}
+
+void
+base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+    size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
     size_t *mapped, size_t *n_thp) {
 	cassert(config_stats);
 
 	malloc_mutex_lock(tsdn, &base->mtx);
 	assert(base->allocated <= base->resident);
 	assert(base->resident <= base->mapped);
+	assert(
+	    base->edata_allocated + base->rtree_allocated <= base->allocated);
 	*allocated = base->allocated;
+	*edata_allocated = base->edata_allocated;
+	*rtree_allocated = base->rtree_allocated;
 	*resident = base->resident;
 	*mapped = base->mapped;
 	*n_thp = base->n_thp;
diff --git a/src/bin.c b/src/bin.c
index fa204587..6bab4b22 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -41,7 +41,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 bool
 bin_init(bin_t *bin) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	bin->slabcur = NULL;
@@ -67,3 +67,266 @@ void
 bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
 }
+
+void *
+bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
+	void        *ret;
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+	size_t       regind;
+
+	assert(edata_nfree_get(slab) > 0);
+	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
+
+	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
+	ret = (void *)((byte_t *)edata_addr_get(slab)
+	    + (uintptr_t)(bin_info->reg_size * regind));
+	edata_nfree_dec(slab);
+	return ret;
+}
+
+void
+bin_slab_reg_alloc_batch(
+    edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs) {
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+
+	assert(edata_nfree_get(slab) >= cnt);
+	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
+
+#if (!defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
+	for (unsigned i = 0; i < cnt; i++) {
+		size_t regind = bitmap_sfu(
+		    slab_data->bitmap, &bin_info->bitmap_info);
+		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab)
+		    + (uintptr_t)(bin_info->reg_size * regind));
+	}
+#else
+	unsigned group = 0;
+	bitmap_t g = slab_data->bitmap[group];
+	unsigned i = 0;
+	while (i < cnt) {
+		while (g == 0) {
+			g = slab_data->bitmap[++group];
+		}
+		size_t shift = group << LG_BITMAP_GROUP_NBITS;
+		size_t pop = popcount_lu(g);
+		if (pop > (cnt - i)) {
+			pop = cnt - i;
+		}
+
+		/*
+		 * Load from memory locations only once, outside the
+		 * hot loop below.
+		 */
+		uintptr_t base = (uintptr_t)edata_addr_get(slab);
+		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
+		while (pop--) {
+			size_t bit = cfs_lu(&g);
+			size_t regind = shift + bit;
+			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+			*(ptrs + i) = (void *)(base + regsize * regind);
+
+			i++;
+		}
+		slab_data->bitmap[group] = g;
+	}
+#endif
+	edata_nfree_sub(slab, cnt);
+}
+
+void
+bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) > 0);
+	edata_heap_insert(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs++;
+	}
+}
+
+void
+bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) {
+	edata_heap_remove(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs--;
+	}
+}
+
+edata_t *
+bin_slabs_nonfull_tryget(bin_t *bin) {
+	edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull);
+	if (slab == NULL) {
+		return NULL;
+	}
+	if (config_stats) {
+		bin->stats.reslabs++;
+		bin->stats.nonfull_slabs--;
+	}
+	return slab;
+}
+
+void
+bin_slabs_full_insert(bool is_auto, bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) == 0);
+	/*
+	 *  Tracking extents is required by arena_reset, which is not allowed
+	 *  for auto arenas.  Bypass this step to avoid touching the edata
+	 *  linkage (often results in cache misses) for auto arenas.
+	 */
+	if (is_auto) {
+		return;
+	}
+	edata_list_active_append(&bin->slabs_full, slab);
+}
+
+void
+bin_slabs_full_remove(bool is_auto, bin_t *bin, edata_t *slab) {
+	if (is_auto) {
+		return;
+	}
+	edata_list_active_remove(&bin->slabs_full, slab);
+}
+
+void
+bin_dissociate_slab(bool is_auto, edata_t *slab, bin_t *bin) {
+	/* Dissociate slab from bin. */
+	if (slab == bin->slabcur) {
+		bin->slabcur = NULL;
+	} else {
+		szind_t           binind = edata_szind_get(slab);
+		const bin_info_t *bin_info = &bin_infos[binind];
+
+		/*
+		 * The following block's conditional is necessary because if the
+		 * slab only contains one region, then it never gets inserted
+		 * into the non-full slabs heap.
+		 */
+		if (bin_info->nregs == 1) {
+			bin_slabs_full_remove(is_auto, bin, slab);
+		} else {
+			bin_slabs_nonfull_remove(bin, slab);
+		}
+	}
+}
+
+void
+bin_lower_slab(tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	assert(edata_nfree_get(slab) > 0);
+
+	/*
+	 * Make sure that if bin->slabcur is non-NULL, it refers to the
+	 * oldest/lowest non-full slab.  It is okay to NULL slabcur out rather
+	 * than proactively keeping it pointing at the oldest/lowest non-full
+	 * slab.
+	 */
+	if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) {
+		/* Switch slabcur. */
+		if (edata_nfree_get(bin->slabcur) > 0) {
+			bin_slabs_nonfull_insert(bin, bin->slabcur);
+		} else {
+			bin_slabs_full_insert(is_auto, bin, bin->slabcur);
+		}
+		bin->slabcur = slab;
+		if (config_stats) {
+			bin->stats.reslabs++;
+		}
+	} else {
+		bin_slabs_nonfull_insert(bin, slab);
+	}
+}
+
+void
+bin_dalloc_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+
+	assert(slab != bin->slabcur);
+	if (config_stats) {
+		bin->stats.curslabs--;
+	}
+}
+
+void
+bin_dalloc_locked_handle_newly_empty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	bin_dissociate_slab(is_auto, slab, bin);
+	bin_dalloc_slab_prepare(tsdn, slab, bin);
+}
+
+void
+bin_dalloc_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	bin_slabs_full_remove(is_auto, bin, slab);
+	bin_lower_slab(tsdn, is_auto, slab, bin);
+}
+
+void
+bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after slabcur and nonfull both failed. */
+	assert(bin->slabcur == NULL);
+	assert(edata_heap_first(&bin->slabs_nonfull) == NULL);
+	assert(fresh_slab != NULL);
+
+	/* A new slab from arena_slab_alloc() */
+	assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs);
+	if (config_stats) {
+		bin->stats.nslabs++;
+		bin->stats.curslabs++;
+	}
+	bin->slabcur = fresh_slab;
+}
+
+void *
+bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	bin_refill_slabcur_with_fresh_slab(tsdn, bin, binind, fresh_slab);
+
+	return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
+bool
+bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after bin_slab_reg_alloc[_batch] failed. */
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
+
+	if (bin->slabcur != NULL) {
+		bin_slabs_full_insert(is_auto, bin, bin->slabcur);
+	}
+
+	/* Look for a usable slab. */
+	bin->slabcur = bin_slabs_nonfull_tryget(bin);
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0);
+
+	return (bin->slabcur == NULL);
+}
+
+void *
+bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    szind_t binind) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
+		if (bin_refill_slabcur_no_fresh_slab(tsdn, is_auto, bin)) {
+			return NULL;
+		}
+	}
+
+	assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
+	return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
+bin_t *
+bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard_p) {
+	unsigned binshard;
+	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
+		binshard = 0;
+	} else {
+		binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
+	}
+	assert(binshard < bin_infos[binind].n_shards);
+	if (binshard_p != NULL) {
+		*binshard_p = binshard;
+	}
+	return arena_get_bin(arena, binind, binshard);
+}
diff --git a/src/bin_info.c b/src/bin_info.c
index 8629ef88..e10042fd 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -10,12 +10,12 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bin_info_t infos[SC_NBINS]) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		bin_info_t *bin_info = &infos[i];
-		sc_t *sc = &sc_data->sc[i];
+		sc_t       *sc = &sc_data->sc[i];
 		bin_info->reg_size = ((size_t)1U << sc->lg_base)
 		    + ((size_t)sc->ndelta << sc->lg_delta);
 		bin_info->slab_size = (sc->pgs << LG_PAGE);
-		bin_info->nregs =
-		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
+		bin_info->nregs = (uint32_t)(bin_info->slab_size
+		    / bin_info->reg_size);
 		bin_info->n_shards = bin_shard_sizes[i];
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
diff --git a/src/bitmap.c b/src/bitmap.c
index 0ccedc5d..8ac81a67 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -10,7 +10,7 @@
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	unsigned i;
-	size_t group_count;
+	size_t   group_count;
 
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
@@ -24,11 +24,11 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	group_count = BITMAP_BITS2GROUPS(nbits);
 	for (i = 1; group_count > 1; i++) {
 		assert(i < BITMAP_MAX_LEVELS);
-		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-		    + group_count;
+		binfo->levels[i].group_offset =
+		    binfo->levels[i - 1].group_offset + group_count;
 		group_count = BITMAP_BITS2GROUPS(group_count);
 	}
-	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+	binfo->levels[i].group_offset = binfo->levels[i - 1].group_offset
 	    + group_count;
 	assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
 	binfo->nlevels = i;
@@ -42,7 +42,7 @@ bitmap_info_ngroups(const bitmap_info_t *binfo) {
 
 void
 bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
-	size_t extra;
+	size_t   extra;
 	unsigned i;
 
 	/*
@@ -69,12 +69,13 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
 		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
 	}
 	for (i = 1; i < binfo->nlevels; i++) {
-		size_t group_count = binfo->levels[i].group_offset -
-		    binfo->levels[i-1].group_offset;
-		extra = (BITMAP_GROUP_NBITS - (group_count &
-		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
+		size_t group_count = binfo->levels[i].group_offset
+		    - binfo->levels[i - 1].group_offset;
+		extra = (BITMAP_GROUP_NBITS
+		            - (group_count & BITMAP_GROUP_NBITS_MASK))
+		    & BITMAP_GROUP_NBITS_MASK;
 		if (extra != 0) {
-			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+			bitmap[binfo->levels[i + 1].group_offset - 1] >>= extra;
 		}
 	}
 }
diff --git a/src/buf_writer.c b/src/buf_writer.c
index 7c6f7940..3c298502 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -43,8 +43,9 @@ buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
 	if (write_cb != NULL) {
 		buf_writer->write_cb = write_cb;
 	} else {
-		buf_writer->write_cb = je_malloc_message != NULL ?
-		    je_malloc_message : wrtmessage;
+		buf_writer->write_cb = je_malloc_message != NULL
+		    ? je_malloc_message
+		    : wrtmessage;
 	}
 	buf_writer->cbopaque = cbopaque;
 	assert(buf_len >= 2);
@@ -52,8 +53,8 @@ buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
 		buf_writer->buf = buf;
 		buf_writer->internal_buf = false;
 	} else {
-		buf_writer->buf = buf_writer_allocate_internal_buf(tsdn,
-		    buf_len);
+		buf_writer->buf = buf_writer_allocate_internal_buf(
+		    tsdn, buf_len);
 		buf_writer->internal_buf = true;
 	}
 	if (buf_writer->buf != NULL) {
@@ -111,13 +112,13 @@ buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 }
 
 void
-buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
-    void *read_cbopaque) {
+buf_writer_pipe(
+    buf_writer_t *buf_writer, read_cb_t *read_cb, void *read_cbopaque) {
 	/*
 	 * A tiny local buffer in case the buffered writer failed to allocate
 	 * at init.
 	 */
-	static char backup_buf[16];
+	static char         backup_buf[16];
 	static buf_writer_t backup_buf_writer;
 
 	buf_writer_assert(buf_writer);
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 9ae072a0..ec677948 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -5,17 +5,29 @@
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/safety_check.h"
 
+const uintptr_t disabled_bin = JUNK_ADDR;
+
 void
-cache_bin_info_init(cache_bin_info_t *info,
-    cache_bin_sz_t ncached_max) {
+cache_bin_info_init(cache_bin_info_t *info, cache_bin_sz_t ncached_max) {
 	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 
+bool
+cache_bin_stack_use_thp(void) {
+	/*
+	 * If metadata_thp is enabled, allocating tcache stack from the base
+	 * allocator for efficiency gains.  The downside, however, is that base
+	 * allocator never purges freed memory, and may cache a fair amount of
+	 * memory after many threads are terminated and not reused.
+	 */
+	return metadata_thp_enabled();
+}
+
 void
-cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
     size_t *size, size_t *alignment) {
 	/* For the total bin stack region (per tcache), reserve 2 more slots so
 	 * that
@@ -26,74 +38,80 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
-		assert(infos[i].ncached_max > 0);
 		*size += infos[i].ncached_max * sizeof(void *);
 	}
 
 	/*
-	 * Align to at least PAGE, to minimize the # of TLBs needed by the
-	 * smaller sizes; also helps if the larger sizes don't get used at all.
+	 * When not using THP, align to at least PAGE, to minimize the # of TLBs
+	 * needed by the smaller sizes; also helps if the larger sizes don't get
+	 * used at all.
 	 */
-	*alignment = PAGE;
+	*alignment = cache_bin_stack_use_thp() ? QUANTUM : PAGE;
 }
 
 void
-cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
-    size_t *cur_offset) {
+cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos,
+    void *alloc, size_t *cur_offset) {
 	if (config_debug) {
 		size_t computed_size;
 		size_t computed_alignment;
 
 		/* Pointer should be as aligned as we asked for. */
-		cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
-		    &computed_alignment);
+		cache_bin_info_compute_alloc(
+		    infos, ninfos, &computed_size, &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 	}
 
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
-	    cache_bin_preceding_junk;
+	*(uintptr_t *)((byte_t *)alloc
+	    + *cur_offset) = cache_bin_preceding_junk;
 	*cur_offset += sizeof(void *);
 }
 
 void
-cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
-    size_t *cur_offset) {
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
-	    cache_bin_trailing_junk;
+cache_bin_postincrement(void *alloc, size_t *cur_offset) {
+	*(uintptr_t *)((byte_t *)alloc + *cur_offset) = cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
 }
 
 void
-cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
     size_t *cur_offset) {
 	/*
 	 * The full_position points to the lowest available space.  Allocations
 	 * will access the slots toward higher addresses (for the benefit of
 	 * adjacent prefetch).
 	 */
-	void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
-	void *full_position = stack_cur;
-	uint16_t bin_stack_size = info->ncached_max * sizeof(void *);
+	void          *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
+	void          *full_position = stack_cur;
+	cache_bin_sz_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
-	void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
+	void *empty_position = (void *)((byte_t *)alloc + *cur_offset);
 
 	/* Init to the empty position. */
 	bin->stack_head = (void **)empty_position;
-	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
-	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
-	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
-	cache_bin_sz_t free_spots = cache_bin_diff(bin,
-	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head,
-	    /* racy */ false);
+	bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	bin->low_bits_full = (cache_bin_sz_t)(uintptr_t)full_position;
+	bin->low_bits_empty = (cache_bin_sz_t)(uintptr_t)empty_position;
+	cache_bin_info_init(&bin->bin_info, info->ncached_max);
+	cache_bin_sz_t free_spots = cache_bin_diff(bin, bin->low_bits_full,
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
-	assert(cache_bin_ncached_get_local(bin, info) == 0);
+	if (!cache_bin_disabled(bin)) {
+		assert(cache_bin_ncached_get_local(bin) == 0);
+	}
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 
 	assert(bin_stack_size > 0 || empty_position == full_position);
 }
 
-bool
-cache_bin_still_zero_initialized(cache_bin_t *bin) {
-	return bin->stack_head == NULL;
+void
+cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max) {
+	const void      *fake_stack = cache_bin_disabled_bin_stack();
+	size_t           fake_offset = 0;
+	cache_bin_info_t fake_info;
+	cache_bin_info_init(&fake_info, 0);
+	cache_bin_init(bin, &fake_info, (void *)fake_stack, &fake_offset);
+	cache_bin_info_init(&bin->bin_info, ncached_max);
+	assert(fake_offset == 0);
 }
diff --git a/src/ckh.c b/src/ckh.c
index 8db4319c..80688162 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -49,8 +49,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
-static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
+static bool ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void ckh_shrink(tsd_t *tsd, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -60,7 +60,7 @@ static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
  */
 static size_t
 ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
-	ckhc_t *cell;
+	ckhc_t  *cell;
 	unsigned i;
 
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
@@ -98,20 +98,20 @@ ckh_isearch(ckh_t *ckh, const void *key) {
 }
 
 static bool
-ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
-    const void *data) {
-	ckhc_t *cell;
+ckh_try_bucket_insert(
+    ckh_t *ckh, size_t bucket, const void *key, const void *data) {
+	ckhc_t  *cell;
 	unsigned offset, i;
 
 	/*
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
-	    LG_CKH_BUCKET_CELLS);
+	offset = (unsigned)prng_lg_range_u64(
+	    &ckh->prng_state, LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
-		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
-		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS)
+		    + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
 		if (cell->key == NULL) {
 			cell->key = key;
 			cell->data = data;
@@ -130,12 +130,12 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
  * eviction/relocation bucket cycle.
  */
 static bool
-ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
-    void const **argdata) {
+ckh_evict_reloc_insert(
+    ckh_t *ckh, size_t argbucket, void const **argkey, void const **argdata) {
 	const void *key, *data, *tkey, *tdata;
-	ckhc_t *cell;
-	size_t hashes[2], bucket, tbucket;
-	unsigned i;
+	ckhc_t     *cell;
+	size_t      hashes[2], bucket, tbucket;
+	unsigned    i;
 
 	bucket = argbucket;
 	key = *argkey;
@@ -149,15 +149,18 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
-		    LG_CKH_BUCKET_CELLS);
+		i = (unsigned)prng_lg_range_u64(
+		    &ckh->prng_state, LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
 		/* Swap cell->{key,data} and {key,data} (evict). */
-		tkey = cell->key; tdata = cell->data;
-		cell->key = key; cell->data = data;
-		key = tkey; data = tdata;
+		tkey = cell->key;
+		tdata = cell->data;
+		cell->key = key;
+		cell->data = data;
+		key = tkey;
+		data = tdata;
 
 #ifdef CKH_COUNT
 		ckh->nrelocs++;
@@ -167,8 +170,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		ckh->hash(key, hashes);
 		tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 		if (tbucket == bucket) {
-			tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets)
-			    - 1);
+			tbucket = hashes[0]
+			    & ((ZU(1) << ckh->lg_curbuckets) - 1);
 			/*
 			 * It may be that (tbucket == bucket) still, if the
 			 * item's hashes both indicate this bucket.  However,
@@ -201,8 +204,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 }
 
 static bool
-ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
-	size_t hashes[2], bucket;
+ckh_try_insert(ckh_t *ckh, void const **argkey, void const **argdata) {
+	size_t      hashes[2], bucket;
 	const void *key = *argkey;
 	const void *data = *argdata;
 
@@ -232,7 +235,7 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
  */
 static bool
 ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
-	size_t count, i, nins;
+	size_t      count, i, nins;
 	const void *key, *data;
 
 	count = ckh->count;
@@ -254,8 +257,8 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
 
 static bool
 ckh_grow(tsd_t *tsd, ckh_t *ckh) {
-	bool ret;
-	ckhc_t *tab, *ttab;
+	bool     ret;
+	ckhc_t  *tab, *ttab;
 	unsigned lg_prevbuckets, lg_curcells;
 
 #ifdef CKH_COUNT
@@ -274,8 +277,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 
 		lg_curcells++;
 		usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-		if (unlikely(usize == 0
-		    || usize > SC_LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
@@ -309,8 +311,8 @@ label_return:
 
 static void
 ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
-	ckhc_t *tab, *ttab;
-	size_t usize;
+	ckhc_t  *tab, *ttab;
+	size_t   usize;
 	unsigned lg_prevbuckets, lg_curcells;
 
 	/*
@@ -358,8 +360,8 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 bool
 ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *ckh_hash,
     ckh_keycomp_t *keycomp) {
-	bool ret;
-	size_t mincells, usize;
+	bool     ret;
+	size_t   mincells, usize;
 	unsigned lg_mincells;
 
 	assert(minitems > 0);
@@ -386,8 +388,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *ckh_hash,
 	assert(LG_CKH_BUCKET_CELLS > 0);
 	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
 	for (lg_mincells = LG_CKH_BUCKET_CELLS;
-	    (ZU(1) << lg_mincells) < mincells;
-	    lg_mincells++) {
+	     (ZU(1) << lg_mincells) < mincells; lg_mincells++) {
 		/* Do nothing. */
 	}
 	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
@@ -417,11 +418,12 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) {
 	assert(ckh != NULL);
 
 #ifdef CKH_VERBOSE
-	malloc_printf(
-	    "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64","
-	    " nshrinkfails: %"FMTu64", ninserts: %"FMTu64","
-	    " nrelocs: %"FMTu64"\n", __func__, ckh,
-	    (unsigned long long)ckh->ngrows,
+	malloc_printf("%s(%p): ngrows: %" FMTu64 ", nshrinks: %" FMTu64
+	              ","
+	              " nshrinkfails: %" FMTu64 ", ninserts: %" FMTu64
+	              ","
+	              " nrelocs: %" FMTu64 "\n",
+	    __func__, ckh, (unsigned long long)ckh->ngrows,
 	    (unsigned long long)ckh->nshrinks,
 	    (unsigned long long)ckh->nshrinkfails,
 	    (unsigned long long)ckh->ninserts,
@@ -445,8 +447,9 @@ bool
 ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) {
 	size_t i, ncells;
 
-	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
-	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
+	for (i = *tabind,
+	    ncells = (ZU(1) << (ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS));
+	     i < ncells; i++) {
 		if (ckh->tab[i].key != NULL) {
 			if (key != NULL) {
 				*key = (void *)ckh->tab[i].key;
@@ -486,8 +489,8 @@ label_return:
 }
 
 bool
-ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data) {
+ckh_remove(
+    tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data) {
 	size_t cell;
 
 	assert(ckh != NULL);
@@ -505,9 +508,9 @@ ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
 
 		ckh->count--;
 		/* Try to halve the table if it is less than 1/4 full. */
-		if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
-		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
-		    > ckh->lg_minbuckets) {
+		if (ckh->count < (ZU(1)
+		        << (ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 2))
+		    && ckh->lg_curbuckets > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
 			ckh_shrink(tsd, ckh);
 		}
@@ -554,8 +557,8 @@ ckh_string_keycomp(const void *k1, const void *k2) {
 void
 ckh_pointer_hash(const void *key, size_t r_hash[2]) {
 	union {
-		const void	*v;
-		size_t		i;
+		const void *v;
+		size_t      i;
 	} u;
 
 	assert(sizeof(u.v) == sizeof(u.i));
diff --git a/src/conf.c b/src/conf.c
new file mode 100644
index 00000000..65abcd25
--- /dev/null
+++ b/src/conf.c
@@ -0,0 +1,1203 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/fxp.h"
+#include "jemalloc/internal/log.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/safety_check.h"
+#include "jemalloc/internal/san.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/util.h"
+
+#include "jemalloc/internal/conf.h"
+
+/* Whether encountered any invalid config options. */
+bool had_conf_error;
+
+static char *
+jemalloc_getenv(const char *name) {
+#ifdef JEMALLOC_FORCE_GETENV
+	return getenv(name);
+#else
+#	ifdef JEMALLOC_HAVE_SECURE_GETENV
+	return secure_getenv(name);
+#	else
+#		ifdef JEMALLOC_HAVE_ISSETUGID
+	if (issetugid() != 0) {
+		return NULL;
+	}
+#		endif
+	return getenv(name);
+#	endif
+#endif
+}
+
+static void
+init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
+	size_t opts_len = strlen(dest);
+	assert(opts_len <= stats_print_tot_num_options);
+
+	for (size_t i = 0; i < vlen; i++) {
+		switch (v[i]) {
+#define OPTION(o, v, d, s)                                                     \
+	case o:                                                                \
+		break;
+			STATS_PRINT_OPTIONS
+#undef OPTION
+		default:
+			continue;
+		}
+
+		if (strchr(dest, v[i]) != NULL) {
+			/* Ignore repeated. */
+			continue;
+		}
+
+		dest[opts_len++] = v[i];
+		dest[opts_len] = '\0';
+		assert(opts_len <= stats_print_tot_num_options);
+	}
+	assert(opts_len == strlen(dest));
+}
+
+static void
+malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
+	size_t len = end - begin + 1;
+	len = len > BUFERROR_BUF ? BUFERROR_BUF : len;
+
+	malloc_printf("<jemalloc>: %s -- %.*s\n", msg, (int)len, begin);
+}
+
+JET_EXTERN bool
+conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+    char const **v_p, size_t *vlen_p) {
+	bool        accept;
+	const char *opts = *opts_p;
+
+	*k_p = opts;
+
+	for (accept = false; !accept;) {
+		switch (*opts) {
+		case 'A':
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'E':
+		case 'F':
+		case 'G':
+		case 'H':
+		case 'I':
+		case 'J':
+		case 'K':
+		case 'L':
+		case 'M':
+		case 'N':
+		case 'O':
+		case 'P':
+		case 'Q':
+		case 'R':
+		case 'S':
+		case 'T':
+		case 'U':
+		case 'V':
+		case 'W':
+		case 'X':
+		case 'Y':
+		case 'Z':
+		case 'a':
+		case 'b':
+		case 'c':
+		case 'd':
+		case 'e':
+		case 'f':
+		case 'g':
+		case 'h':
+		case 'i':
+		case 'j':
+		case 'k':
+		case 'l':
+		case 'm':
+		case 'n':
+		case 'o':
+		case 'p':
+		case 'q':
+		case 'r':
+		case 's':
+		case 't':
+		case 'u':
+		case 'v':
+		case 'w':
+		case 'x':
+		case 'y':
+		case 'z':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		case '_':
+			opts++;
+			break;
+		case ':':
+			opts++;
+			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
+			*v_p = opts;
+			accept = true;
+			break;
+		case '\0':
+			if (opts != *opts_p) {
+				malloc_conf_format_error(
+				    "Conf string ends with key", *opts_p,
+				    opts - 1);
+				had_conf_error = true;
+			}
+			return true;
+		default:
+			malloc_conf_format_error(
+			    "Malformed conf string", *opts_p, opts);
+			had_conf_error = true;
+			return true;
+		}
+	}
+
+	for (accept = false; !accept;) {
+		switch (*opts) {
+		case ',':
+			opts++;
+			/*
+			 * Look ahead one character here, because the next time
+			 * this function is called, it will assume that end of
+			 * input has been cleanly reached if no input remains,
+			 * but we have optimistically already consumed the
+			 * comma if one exists.
+			 */
+			if (*opts == '\0') {
+				malloc_conf_format_error(
+				    "Conf string ends with comma", *opts_p,
+				    opts - 1);
+				had_conf_error = true;
+			}
+			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		case '\0':
+			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		default:
+			opts++;
+			break;
+		}
+	}
+
+	*opts_p = opts;
+	return false;
+}
+
+void
+malloc_abort_invalid_conf(void) {
+	assert(opt_abort_conf);
+	malloc_printf(
+	    "<jemalloc>: Abort (abort_conf:true) on invalid conf "
+	    "value (see above).\n");
+	invalid_conf_abort();
+}
+
+JET_EXTERN void
+conf_error(
+    const char *msg, const char *k, size_t klen, const char *v, size_t vlen) {
+	malloc_printf(
+	    "<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v);
+	/* If abort_conf is set, error out after processing all options. */
+	const char *experimental = "experimental_";
+	if (strncmp(k, experimental, strlen(experimental)) == 0) {
+		/* However, tolerate experimental features. */
+		return;
+	}
+	const char  *deprecated[] = {"hpa_sec_bytes_after_flush"};
+	const size_t deprecated_cnt = (sizeof(deprecated)
+	    / sizeof(deprecated[0]));
+	for (size_t i = 0; i < deprecated_cnt; ++i) {
+		if (strncmp(k, deprecated[i], strlen(deprecated[i])) == 0) {
+			/* Tolerate deprecated features. */
+			return;
+		}
+	}
+	had_conf_error = true;
+}
+
+JET_EXTERN bool
+conf_handle_bool(const char *v, size_t vlen, bool *result) {
+	if (sizeof("true") - 1 == vlen && strncmp("true", v, vlen) == 0) {
+		*result = true;
+	} else if (sizeof("false") - 1 == vlen
+	    && strncmp("false", v, vlen) == 0) {
+		*result = false;
+	} else {
+		return true;
+	}
+	return false;
+}
+
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-function")
+
+JET_EXTERN bool
+conf_handle_signed(const char *v, size_t vlen, intmax_t min, intmax_t max,
+    bool check_min, bool check_max, bool clip, intmax_t *result) {
+	char *end;
+	set_errno(0);
+	intmax_t mv = (intmax_t)malloc_strtoumax(v, &end, 0);
+	if (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen) {
+		return true;
+	}
+	if (clip) {
+		if (check_min && mv < min) {
+			*result = min;
+		} else if (check_max && mv > max) {
+			*result = max;
+		} else {
+			*result = mv;
+		}
+	} else {
+		if ((check_min && mv < min) || (check_max && mv > max)) {
+			return true;
+		}
+		*result = mv;
+	}
+	return false;
+}
+
+JET_EXTERN bool
+conf_handle_char_p(const char *v, size_t vlen, char *dest, size_t dest_sz) {
+	if (dest_sz == 0) {
+		return false;
+	}
+	size_t cpylen = (vlen <= dest_sz - 1) ? vlen : dest_sz - 1;
+	strncpy(dest, v, cpylen);
+	dest[cpylen] = '\0';
+	return false;
+}
+
+JEMALLOC_DIAGNOSTIC_POP
+
+/* Number of sources for initializing malloc_conf */
+#define MALLOC_CONF_NSOURCES 5
+
+static const char *
+obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
+	if (config_debug) {
+		static unsigned read_source = 0;
+		/*
+		 * Each source should only be read once, to minimize # of
+		 * syscalls on init.
+		 */
+		assert(read_source == which_source);
+		read_source++;
+	}
+	assert(which_source < MALLOC_CONF_NSOURCES);
+
+	const char *ret;
+	switch (which_source) {
+	case 0:
+		ret = config_malloc_conf;
+		break;
+	case 1:
+		if (je_malloc_conf != NULL) {
+			/* Use options that were compiled into the program. */
+			ret = je_malloc_conf;
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+	case 2: {
+#ifndef JEMALLOC_CONFIG_FILE
+		ret = NULL;
+		break;
+#else
+		ssize_t linklen = 0;
+#	ifndef _WIN32
+		int         saved_errno = errno;
+		const char *linkname =
+#		ifdef JEMALLOC_PREFIX
+		    "/etc/" JEMALLOC_PREFIX "malloc.conf"
+#		else
+		    "/etc/malloc.conf"
+#		endif
+		    ;
+
+		/*
+		 * Try to use the contents of the "/etc/malloc.conf" symbolic
+		 * link's name.
+		 */
+#		ifndef JEMALLOC_READLINKAT
+		linklen = readlink(linkname, readlink_buf, PATH_MAX);
+#		else
+		linklen = readlinkat(
+		    AT_FDCWD, linkname, readlink_buf, PATH_MAX);
+#		endif
+		if (linklen == -1) {
+			/* No configuration specified. */
+			linklen = 0;
+			/* Restore errno. */
+			set_errno(saved_errno);
+		}
+#	endif
+		readlink_buf[linklen] = '\0';
+		ret = readlink_buf;
+		break;
+#endif
+	}
+	case 3: {
+#ifndef JEMALLOC_CONFIG_ENV
+		ret = NULL;
+		break;
+#else
+		const char *envname =
+#	ifdef JEMALLOC_PREFIX
+		    JEMALLOC_CPREFIX "MALLOC_CONF"
+#	else
+		    "MALLOC_CONF"
+#	endif
+		    ;
+
+		if ((ret = jemalloc_getenv(envname)) != NULL) {
+			opt_malloc_conf_env_var = ret;
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+#endif
+	}
+	case 4: {
+		ret = je_malloc_conf_2_conf_harder;
+		break;
+	}
+	default:
+		not_reached();
+		ret = NULL;
+	}
+	return ret;
+}
+
+static void
+validate_hpa_settings(void) {
+	if (!hpa_supported() || !opt_hpa) {
+		return;
+	}
+	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: huge page size (%zu) greater than expected."
+		    "May not be supported or behave as expected.",
+		    HUGEPAGE);
+	}
+#ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
+	if (opt_hpa_opts.hugify_sync) {
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: hpa_hugify_sync config option is enabled, "
+		    "but MADV_COLLAPSE support was not detected at build "
+		    "time.");
+	}
+#endif
+}
+
+static void
+malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
+    char readlink_buf[PATH_MAX + 1]) {
+	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
+	    "string specified via --with-malloc-conf",
+	    "string pointed to by the global variable malloc_conf",
+	    "\"name\" of the file referenced by the symbolic link named "
+	    "/etc/malloc.conf",
+	    "value of the environment variable MALLOC_CONF",
+	    "string pointed to by the global variable "
+	    "malloc_conf_2_conf_harder",
+	};
+	unsigned    i;
+	const char *opts, *k, *v;
+	size_t      klen, vlen;
+
+	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
+		/* Get runtime configuration. */
+		if (initial_call) {
+			opts_cache[i] = obtain_malloc_conf(i, readlink_buf);
+		}
+		opts = opts_cache[i];
+		if (!initial_call && opt_confirm_conf) {
+			malloc_printf(
+			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n", i + 1,
+			    opts_explain[i], opts != NULL ? opts : "");
+		}
+		if (opts == NULL) {
+			continue;
+		}
+
+		while (
+		    *opts != '\0' && !conf_next(&opts, &k, &klen, &v, &vlen)) {
+#define CONF_ERROR(msg, k, klen, v, vlen)                                      \
+	if (!initial_call) {                                                   \
+		conf_error(msg, k, klen, v, vlen);                             \
+		cur_opt_valid = false;                                         \
+	}
+#define CONF_CONTINUE                                                          \
+	{                                                                      \
+		if (!initial_call && opt_confirm_conf && cur_opt_valid) {      \
+			malloc_printf(                                         \
+			    "<jemalloc>: -- "                                  \
+			    "Set conf value: %.*s:%.*s"                        \
+			    "\n",                                              \
+			    (int)klen, k, (int)vlen, v);                       \
+		}                                                              \
+		continue;                                                      \
+	}
+#define CONF_MATCH(n) (sizeof(n) - 1 == klen && strncmp(n, k, klen) == 0)
+#define CONF_MATCH_VALUE(n) (sizeof(n) - 1 == vlen && strncmp(n, v, vlen) == 0)
+#define CONF_HANDLE_BOOL(o, n)                                                 \
+	if (CONF_MATCH(n)) {                                                   \
+		if (conf_handle_bool(v, vlen, &o)) {                           \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+			/*
+       * One of the CONF_MIN macros below expands, in one of the use points,
+       * to "unsigned integer < 0", which is always false, triggering the
+       * GCC -Wtype-limits warning, which we disable here and re-enable below.
+       */
+			JEMALLOC_DIAGNOSTIC_PUSH
+			JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+
+#define CONF_DONT_CHECK_MIN(um, min) false
+#define CONF_CHECK_MIN(um, min) ((um) < (min))
+#define CONF_DONT_CHECK_MAX(um, max) false
+#define CONF_CHECK_MAX(um, max) ((um) > (max))
+
+#define CONF_VALUE_READ(max_t, result)                                         \
+	char *end;                                                             \
+	set_errno(0);                                                          \
+	result = (max_t)malloc_strtoumax(v, &end, 0);
+#define CONF_VALUE_READ_FAIL()                                                 \
+	(get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
+
+#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip)    \
+	if (CONF_MATCH(n)) {                                                   \
+		max_t mv;                                                      \
+		CONF_VALUE_READ(max_t, mv)                                     \
+		if (CONF_VALUE_READ_FAIL()) {                                  \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		} else if (clip) {                                             \
+			if (check_min(mv, (t)(min))) {                         \
+				o = (t)(min);                                  \
+			} else if (check_max(mv, (t)(max))) {                  \
+				o = (t)(max);                                  \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		} else {                                                       \
+			if (check_min(mv, (t)(min))                            \
+			    || check_max(mv, (t)(max))) {                      \
+				CONF_ERROR(                                    \
+				    "Out-of-range "                            \
+				    "conf value",                              \
+				    k, klen, v, vlen);                         \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)    \
+	CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min, check_max, clip)
+
+#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(unsigned, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T_U(size_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)        \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    int64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(uint64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SSIZE_T(o, n, min, max)                                    \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    ssize_t, o, n, min, max, CONF_CHECK_MIN, CONF_CHECK_MAX, false)
+#define CONF_HANDLE_CHAR_P(o, n, d)                                            \
+	if (CONF_MATCH(n)) {                                                   \
+		size_t cpylen = (vlen <= sizeof(o) - 1) ? vlen                 \
+		                                        : sizeof(o) - 1;       \
+		strncpy(o, v, cpylen);                                         \
+		o[cpylen] = '\0';                                              \
+		CONF_CONTINUE;                                                 \
+	}
+
+			bool cur_opt_valid = true;
+
+			CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf")
+			if (initial_call) {
+				continue;
+			}
+
+			CONF_HANDLE_BOOL(opt_abort, "abort")
+			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
+			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
+			CONF_HANDLE_BOOL(
+			    opt_experimental_hpa_start_huge_if_thp_always,
+			    "experimental_hpa_start_huge_if_thp_always")
+			CONF_HANDLE_BOOL(opt_experimental_hpa_enforce_hugify,
+			    "experimental_hpa_enforce_hugify")
+			CONF_HANDLE_BOOL(
+			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
+			if (strncmp("metadata_thp", k, klen) == 0) {
+				int  m;
+				bool match = false;
+				for (m = 0; m < metadata_thp_mode_limit; m++) {
+					if (strncmp(metadata_thp_mode_names[m],
+					        v, vlen)
+					    == 0) {
+						opt_metadata_thp = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_BOOL(opt_retain, "retain")
+			if (strncmp("dss", k, klen) == 0) {
+				int  m;
+				bool match = false;
+				for (m = 0; m < dss_prec_limit; m++) {
+					if (strncmp(dss_prec_names[m], v, vlen)
+					    == 0) {
+						if (extent_dss_prec_set(m)) {
+							CONF_ERROR(
+							    "Error setting dss",
+							    k, klen, v, vlen);
+						} else {
+							opt_dss =
+							    dss_prec_names[m];
+							match = true;
+							break;
+						}
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("narenas")) {
+				if (CONF_MATCH_VALUE("default")) {
+					opt_narenas = 0;
+					CONF_CONTINUE;
+				} else {
+					CONF_HANDLE_UNSIGNED(opt_narenas,
+					    "narenas", 1, UINT_MAX,
+					    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+					    /* clip */ false)
+				}
+			}
+			if (CONF_MATCH("narenas_ratio")) {
+				char *end;
+				bool  err = fxp_parse(
+                                    &opt_narenas_ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("bin_shards")) {
+				const char *bin_shards_segment_cur = v;
+				size_t      vlen_left = vlen;
+				do {
+					size_t size_start;
+					size_t size_end;
+					size_t nshards;
+					bool   err = multi_setting_parse_next(
+                                            &bin_shards_segment_cur, &vlen_left,
+                                            &size_start, &size_end, &nshards);
+					if (err
+					    || bin_update_shard_size(
+					        bin_shard_sizes, size_start,
+					        size_end, nshards)) {
+						CONF_ERROR(
+						    "Invalid settings for "
+						    "bin_shards",
+						    k, klen, v, vlen);
+						break;
+					}
+				} while (vlen_left > 0);
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("tcache_ncached_max")) {
+				bool err = tcache_bin_info_default_init(
+				    v, vlen);
+				if (err) {
+					CONF_ERROR(
+					    "Invalid settings for "
+					    "tcache_ncached_max",
+					    k, klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
+			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
+			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
+			    "dirty_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
+			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
+			    "muzzy_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
+			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
+			    "process_madvise_max_batch", 0,
+			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
+			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
+			if (CONF_MATCH("stats_print_opts")) {
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_print_opts);
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_INT64_T(opt_stats_interval,
+			    "stats_interval", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false)
+			if (CONF_MATCH("stats_interval_opts")) {
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_interval_opts);
+				CONF_CONTINUE;
+			}
+			if (config_fill) {
+				if (CONF_MATCH("junk")) {
+					if (CONF_MATCH_VALUE("true")) {
+						opt_junk = "true";
+						opt_junk_alloc = opt_junk_free =
+						    true;
+					} else if (CONF_MATCH_VALUE("false")) {
+						opt_junk = "false";
+						opt_junk_alloc = opt_junk_free =
+						    false;
+					} else if (CONF_MATCH_VALUE("alloc")) {
+						opt_junk = "alloc";
+						opt_junk_alloc = true;
+						opt_junk_free = false;
+					} else if (CONF_MATCH_VALUE("free")) {
+						opt_junk = "free";
+						opt_junk_alloc = false;
+						opt_junk_free = true;
+					} else {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+					CONF_CONTINUE;
+				}
+				CONF_HANDLE_BOOL(opt_zero, "zero")
+			}
+			if (config_utrace) {
+				CONF_HANDLE_BOOL(opt_utrace, "utrace")
+			}
+			if (config_xmalloc) {
+				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
+			}
+			if (config_enable_cxx) {
+				CONF_HANDLE_BOOL(
+				    opt_experimental_infallible_new,
+				    "experimental_infallible_new")
+			}
+
+			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
+			    "experimental_tcache_gc")
+			CONF_HANDLE_BOOL(opt_tcache, "tcache")
+			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max", 0,
+			    TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			if (CONF_MATCH("lg_tcache_max")) {
+				size_t m;
+				CONF_VALUE_READ(size_t, m)
+				if (CONF_VALUE_READ_FAIL()) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					/* clip if necessary */
+					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
+						m = TCACHE_LG_MAXCLASS_LIMIT;
+					}
+					opt_tcache_max = (size_t)1 << m;
+				}
+				CONF_CONTINUE;
+			}
+			/*
+			 * Anyone trying to set a value outside -16 to 16 is
+			 * deeply confused.
+			 */
+			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
+			    "lg_tcache_nslots_mul", -16, 16)
+			/* Ditto with values past 2048. */
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
+			    "tcache_nslots_small_min", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
+			    "tcache_nslots_small_max", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
+			    "tcache_nslots_large", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
+			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
+			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
+			    "lg_tcache_flush_small_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
+			    "lg_tcache_flush_large_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
+			    "debug_double_free_max_scan", 0, UINT_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
+			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
+			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ false)
+
+			/*
+			 * The runtime option of oversize_threshold remains
+			 * undocumented.  It may be tweaked in the next major
+			 * release (6.0).  The default value 8M is rather
+			 * conservative / safe.  Tuning it further down may
+			 * improve fragmentation a bit more, but may also cause
+			 * contention on the huge arena.
+			 */
+			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
+			    "oversize_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false)
+			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
+			    "lg_extent_max_active_fit", 0,
+			    (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, false)
+
+			if (strncmp("percpu_arena", k, klen) == 0) {
+				bool match = false;
+				for (int m = percpu_arena_mode_names_base;
+				    m < percpu_arena_mode_names_limit; m++) {
+					if (strncmp(percpu_arena_mode_names[m],
+					        v, vlen)
+					    == 0) {
+						if (!have_percpu_arena) {
+							CONF_ERROR(
+							    "No getcpu support",
+							    k, klen, v, vlen);
+						}
+						opt_percpu_arena = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_BOOL(
+			    opt_background_thread, "background_thread");
+			CONF_HANDLE_SIZE_T(opt_max_background_threads,
+			    "max_background_threads", 1,
+			    opt_max_background_threads, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
+			CONF_HANDLE_BOOL(opt_hpa, "hpa")
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
+			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+
+			/*
+			 * Accept either a ratio-based or an exact hugification
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.hugification_threshold,
+			    "hpa_hugification_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.hugification_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.hugify_delay_ms,
+			    "hpa_hugify_delay_ms", 0, 0, CONF_DONT_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
+
+			CONF_HANDLE_BOOL(
+			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_interval_ms,
+			    "hpa_min_purge_interval_ms", 0, 0,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
+			CONF_HANDLE_SSIZE_T(
+			    opt_hpa_opts.experimental_max_purge_nhp,
+			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
+
+			/*
+			 * Accept either a ratio-based or an exact purge
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.purge_threshold,
+			    "hpa_purge_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_purge_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.purge_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_delay_ms,
+			    "hpa_min_purge_delay_ms", 0, UINT64_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
+			if (strncmp("hpa_hugify_style", k, klen) == 0) {
+				bool match = false;
+				for (int m = 0; m < hpa_hugify_style_limit;
+				    m++) {
+					if (strncmp(hpa_hugify_style_names[m],
+					        v, vlen)
+					    == 0) {
+						opt_hpa_opts.hugify_style = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+
+			if (CONF_MATCH("hpa_dirty_mult")) {
+				if (CONF_MATCH_VALUE("-1")) {
+					opt_hpa_opts.dirty_mult = (fxp_t)-1;
+					CONF_CONTINUE;
+				}
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.dirty_mult = ratio;
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
+			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
+			    "hpa_sec_max_alloc", PAGE,
+			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
+			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
+			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+
+			if (CONF_MATCH("slab_sizes")) {
+				if (CONF_MATCH_VALUE("default")) {
+					sc_data_init(sc_data);
+					CONF_CONTINUE;
+				}
+				bool        err;
+				const char *slab_size_segment_cur = v;
+				size_t      vlen_left = vlen;
+				do {
+					size_t slab_start;
+					size_t slab_end;
+					size_t pgs;
+					err = multi_setting_parse_next(
+					    &slab_size_segment_cur, &vlen_left,
+					    &slab_start, &slab_end, &pgs);
+					if (!err) {
+						sc_data_update_slab_size(
+						    sc_data, slab_start,
+						    slab_end, (int)pgs);
+					} else {
+						CONF_ERROR(
+						    "Invalid settings "
+						    "for slab_sizes",
+						    k, klen, v, vlen);
+					}
+				} while (!err && vlen_left > 0);
+				CONF_CONTINUE;
+			}
+			if (config_prof) {
+				CONF_HANDLE_BOOL(opt_prof, "prof")
+				CONF_HANDLE_CHAR_P(
+				    opt_prof_prefix, "prof_prefix", "jeprof")
+				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
+				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
+				    "prof_thread_active_init")
+				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
+				    "lg_prof_sample", 0,
+				    (sizeof(uint64_t) << 3) - 1,
+				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
+				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
+				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
+				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
+				    CONF_CHECK_MIN, CONF_CHECK_MAX,
+				    /* clip */ true)
+				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
+				    "lg_prof_interval", -1,
+				    (sizeof(uint64_t) << 3) - 1)
+				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
+				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
+				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
+				CONF_HANDLE_BOOL(
+				    opt_prof_leak_error, "prof_leak_error")
+				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
+				CONF_HANDLE_BOOL(opt_prof_pid_namespace,
+				    "prof_pid_namespace")
+				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
+				    "prof_recent_alloc_max", -1, SSIZE_MAX)
+				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
+				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
+				    "prof_sys_thread_name")
+				if (CONF_MATCH("prof_time_resolution")) {
+					if (CONF_MATCH_VALUE("default")) {
+						opt_prof_time_res =
+						    prof_time_res_default;
+					} else if (CONF_MATCH_VALUE("high")) {
+						if (!config_high_res_timer) {
+							CONF_ERROR(
+							    "No high resolution"
+							    " timer support",
+							    k, klen, v, vlen);
+						} else {
+							opt_prof_time_res =
+							    prof_time_res_high;
+						}
+					} else {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+					CONF_CONTINUE;
+				}
+				/*
+				 * Undocumented.  When set to false, don't
+				 * correct for an unbiasing bug in jeprof
+				 * attribution.  This can be handy if you want
+				 * to get consistent numbers from your binary
+				 * across different jemalloc versions, even if
+				 * those numbers are incorrect.  The default is
+				 * true.
+				 */
+				CONF_HANDLE_BOOL(opt_prof_unbias, "prof_unbias")
+			}
+			if (config_log) {
+				if (CONF_MATCH("log")) {
+					size_t cpylen = (vlen
+					            <= sizeof(log_var_names)
+					        ? vlen
+					        : sizeof(log_var_names) - 1);
+					strncpy(log_var_names, v, cpylen);
+					log_var_names[cpylen] = '\0';
+					CONF_CONTINUE;
+				}
+			}
+			if (CONF_MATCH("thp")) {
+				bool match = false;
+				for (int m = 0; m < thp_mode_names_limit; m++) {
+					if (strncmp(thp_mode_names[m], v, vlen)
+					    == 0) {
+						if (!have_madvise_huge
+						    && !have_memcntl) {
+							CONF_ERROR(
+							    "No THP support", k,
+							    klen, v, vlen);
+						}
+						opt_thp = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("zero_realloc")) {
+				if (CONF_MATCH_VALUE("alloc")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_alloc;
+				} else if (CONF_MATCH_VALUE("free")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_free;
+				} else if (CONF_MATCH_VALUE("abort")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_abort;
+				} else {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (config_uaf_detection
+			    && CONF_MATCH("lg_san_uaf_align")) {
+				ssize_t a;
+				CONF_VALUE_READ(ssize_t, a)
+				if (CONF_VALUE_READ_FAIL() || a < -1) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				if (a == -1) {
+					opt_lg_san_uaf_align = -1;
+					CONF_CONTINUE;
+				}
+
+				/* clip if necessary */
+				ssize_t max_allowed = (sizeof(size_t) << 3) - 1;
+				ssize_t min_allowed = LG_PAGE;
+				if (a > max_allowed) {
+					a = max_allowed;
+				} else if (a < min_allowed) {
+					a = min_allowed;
+				}
+
+				opt_lg_san_uaf_align = a;
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_SIZE_T(opt_san_guard_small,
+			    "san_guard_small", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			CONF_HANDLE_SIZE_T(opt_san_guard_large,
+			    "san_guard_large", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+
+			/*
+			 * Disable large size classes is now the default
+			 * behavior in jemalloc.  Although it is configurable
+			 * in MALLOC_CONF, this is mainly for debugging
+			 * purposes and should not be tuned.
+			 */
+			CONF_HANDLE_BOOL(opt_disable_large_size_classes,
+			    "disable_large_size_classes");
+
+			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
+#undef CONF_ERROR
+#undef CONF_CONTINUE
+#undef CONF_MATCH
+#undef CONF_MATCH_VALUE
+#undef CONF_HANDLE_BOOL
+#undef CONF_DONT_CHECK_MIN
+#undef CONF_CHECK_MIN
+#undef CONF_DONT_CHECK_MAX
+#undef CONF_CHECK_MAX
+#undef CONF_HANDLE_T
+#undef CONF_HANDLE_T_U
+#undef CONF_HANDLE_T_SIGNED
+#undef CONF_HANDLE_UNSIGNED
+#undef CONF_HANDLE_SIZE_T
+#undef CONF_HANDLE_SSIZE_T
+#undef CONF_HANDLE_CHAR_P
+			/* Re-enable diagnostic "-Wtype-limits" */
+			JEMALLOC_DIAGNOSTIC_POP
+		}
+		validate_hpa_settings();
+		if (opt_abort_conf && had_conf_error) {
+			malloc_abort_invalid_conf();
+		}
+	}
+	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
+}
+
+static bool
+malloc_conf_init_check_deps(void) {
+	if (opt_prof_leak_error && !opt_prof_final) {
+		malloc_printf(
+		    "<jemalloc>: prof_leak_error is set w/o "
+		    "prof_final.\n");
+		return true;
+	}
+	/* To emphasize in the stats output that opt is disabled when !debug. */
+	if (!config_debug) {
+		opt_debug_double_free_max_scan = 0;
+	}
+
+	return false;
+}
+
+void
+malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    char readlink_buf[PATH_MAX + 1]) {
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {
+	    NULL, NULL, NULL, NULL, NULL};
+
+	/* The first call only set the confirm_conf option and opts_cache */
+	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
+	malloc_conf_init_helper(
+	    sc_data, bin_shard_sizes, false, opts_cache, NULL);
+	if (malloc_conf_init_check_deps()) {
+		/* check_deps does warning msg only; abort below if needed. */
+		if (opt_abort_conf) {
+			malloc_abort_invalid_conf();
+		}
+	}
+}
+
+#undef MALLOC_CONF_NSOURCES
diff --git a/src/counter.c b/src/counter.c
index 8f1ae3af..8257a062 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -6,7 +6,7 @@
 bool
 counter_accum_init(counter_accum_t *counter, uint64_t interval) {
 	if (LOCKEDINT_MTX_INIT(counter->mtx, "counter_accum",
-	    WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	locked_init_u64_unsynchronized(&counter->accumbytes, 0);
diff --git a/src/ctl.c b/src/ctl.c
index 135271ba..0b72086c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -25,10 +25,10 @@
  * ctl_mtx protects the following:
  * - ctl_stats->*
  */
-static malloc_mutex_t	ctl_mtx;
-static bool		ctl_initialized;
-static ctl_stats_t	*ctl_stats;
-static ctl_arenas_t	*ctl_arenas;
+static malloc_mutex_t ctl_mtx;
+static bool           ctl_initialized;
+static ctl_stats_t   *ctl_stats;
+static ctl_arenas_t  *ctl_arenas;
 
 /******************************************************************************/
 /* Helpers for named and indexed nodes. */
@@ -53,20 +53,23 @@ ctl_indexed_node(const ctl_node_t *node) {
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-#define CTL_PROTO(n)							\
-static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+#define CTL_PROTO(n)                                                           \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
-#define INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
-    const size_t *mib, size_t miblen, size_t i);
+#define INDEX_PROTO(n)                                                         \
+	static const ctl_named_node_t *n##_index(                              \
+	    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i);
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
 CTL_PROTO(background_thread)
 CTL_PROTO(max_background_threads)
 CTL_PROTO(thread_tcache_enabled)
+CTL_PROTO(thread_tcache_max)
 CTL_PROTO(thread_tcache_flush)
+CTL_PROTO(thread_tcache_ncached_max_write)
+CTL_PROTO(thread_tcache_ncached_max_read_sizeclass)
 CTL_PROTO(thread_peak_read)
 CTL_PROTO(thread_peak_reset)
 CTL_PROTO(thread_prof_name)
@@ -86,25 +89,34 @@ CTL_PROTO(config_opt_safety_checks)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
+CTL_PROTO(config_prof_frameptr)
 CTL_PROTO(config_stats)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_cache_oblivious)
+CTL_PROTO(opt_debug_double_free_max_scan)
 CTL_PROTO(opt_trust_madvise)
+CTL_PROTO(opt_experimental_hpa_start_huge_if_thp_always)
+CTL_PROTO(opt_experimental_hpa_enforce_hugify)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
+CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
+CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
+CTL_PROTO(opt_hpa_purge_threshold)
+CTL_PROTO(opt_hpa_min_purge_delay_ms)
+CTL_PROTO(opt_hpa_hugify_style)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
-CTL_PROTO(opt_hpa_sec_bytes_after_flush)
 CTL_PROTO(opt_hpa_sec_batch_fill_extra)
+CTL_PROTO(opt_huge_arena_pac_thp)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -125,6 +137,7 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
+CTL_PROTO(opt_experimental_tcache_gc)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
@@ -141,6 +154,7 @@ CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
 CTL_PROTO(opt_prof_thread_active_init)
+CTL_PROTO(opt_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
@@ -148,12 +162,19 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_leak_error)
 CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_prof_pid_namespace)
 CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_stats)
 CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
+CTL_PROTO(opt_disable_large_size_classes)
+CTL_PROTO(opt_process_madvise_max_batch)
+CTL_PROTO(opt_malloc_conf_symlink)
+CTL_PROTO(opt_malloc_conf_env_var)
+CTL_PROTO(opt_malloc_conf_global_var)
+CTL_PROTO(opt_malloc_conf_global_var_2_conf_harder)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
@@ -168,6 +189,7 @@ CTL_PROTO(arena_i_dirty_decay_ms)
 CTL_PROTO(arena_i_muzzy_decay_ms)
 CTL_PROTO(arena_i_extent_hooks)
 CTL_PROTO(arena_i_retain_grow_limit)
+CTL_PROTO(arena_i_name)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
@@ -181,6 +203,7 @@ CTL_PROTO(arenas_dirty_decay_ms)
 CTL_PROTO(arenas_muzzy_decay_ms)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
+CTL_PROTO(arenas_hugepage)
 CTL_PROTO(arenas_tcache_max)
 CTL_PROTO(arenas_nbins)
 CTL_PROTO(arenas_nhbins)
@@ -238,12 +261,27 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
+
+/* Merged set of stats for HPA shard. */
+CTL_PROTO(stats_arenas_i_hpa_shard_npageslabs)
+CTL_PROTO(stats_arenas_i_hpa_shard_nactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_ndirty)
+
 CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
+CTL_PROTO(stats_arenas_i_hpa_shard_nhugify_failures)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 
-/* We have a set of stats for full slabs. */
+/* Set of stats for non-hugified and hugified slabs. */
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_ndirty_huge)
+
+/* A parallel set of stats for full slabs. */
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
@@ -271,6 +309,7 @@ CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)
 
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
+
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
@@ -290,12 +329,19 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise)
 CTL_PROTO(stats_arenas_i_muzzy_purged)
 CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
+CTL_PROTO(stats_arenas_i_metadata_edata)
+CTL_PROTO(stats_arenas_i_metadata_rtree)
 CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_tcache_stashed_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 CTL_PROTO(stats_arenas_i_abandoned_vm)
 CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
+CTL_PROTO(stats_arenas_i_hpa_sec_hits)
+CTL_PROTO(stats_arenas_i_hpa_sec_misses)
+CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_flush)
+CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_noflush)
+CTL_PROTO(stats_arenas_i_hpa_sec_overfills)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
@@ -303,17 +349,22 @@ CTL_PROTO(stats_background_thread_num_threads)
 CTL_PROTO(stats_background_thread_num_runs)
 CTL_PROTO(stats_background_thread_run_interval)
 CTL_PROTO(stats_metadata)
+CTL_PROTO(stats_metadata_edata)
+CTL_PROTO(stats_metadata_rtree)
 CTL_PROTO(stats_metadata_thp)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
 CTL_PROTO(stats_zero_reallocs)
+CTL_PROTO(approximate_stats_active)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
+CTL_PROTO(experimental_hooks_prof_sample)
+CTL_PROTO(experimental_hooks_prof_sample_free)
+CTL_PROTO(experimental_hooks_thread_event)
 CTL_PROTO(experimental_hooks_safety_check_abort)
-CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
@@ -323,14 +374,14 @@ CTL_PROTO(experimental_prof_recent_alloc_dump)
 CTL_PROTO(experimental_batch_alloc)
 CTL_PROTO(experimental_arenas_create_ext)
 
-#define MUTEX_STATS_CTL_PROTO_GEN(n)					\
-CTL_PROTO(stats_##n##_num_ops)						\
-CTL_PROTO(stats_##n##_num_wait)						\
-CTL_PROTO(stats_##n##_num_spin_acq)					\
-CTL_PROTO(stats_##n##_num_owner_switch)					\
-CTL_PROTO(stats_##n##_total_wait_time)					\
-CTL_PROTO(stats_##n##_max_wait_time)					\
-CTL_PROTO(stats_##n##_max_num_thds)
+#define MUTEX_STATS_CTL_PROTO_GEN(n)                                           \
+	CTL_PROTO(stats_##n##_num_ops)                                         \
+	CTL_PROTO(stats_##n##_num_wait)                                        \
+	CTL_PROTO(stats_##n##_num_spin_acq)                                    \
+	CTL_PROTO(stats_##n##_num_owner_switch)                                \
+	CTL_PROTO(stats_##n##_total_wait_time)                                 \
+	CTL_PROTO(stats_##n##_max_wait_time)                                   \
+	CTL_PROTO(stats_##n##_max_num_thds)
 
 /* Global mutexes. */
 #define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(mutexes_##mtx)
@@ -351,473 +402,450 @@ CTL_PROTO(stats_mutexes_reset)
 /******************************************************************************/
 /* mallctl tree. */
 
-#define NAME(n)	{true},	n
-#define CHILD(t, c)							\
-	sizeof(c##_node) / sizeof(ctl_##t##_node_t),			\
-	(ctl_node_t *)c##_node,						\
-	NULL
-#define CTL(c)	0, NULL, c##_ctl
+#define NAME(n) {true}, n
+#define CHILD(t, c)                                                            \
+	sizeof(c##_node) / sizeof(ctl_##t##_node_t), (ctl_node_t *)c##_node,   \
+	    NULL
+#define CTL(c) 0, NULL, c##_ctl
 
 /*
  * Only handles internal indexed nodes, since there are currently no external
  * ones.
  */
-#define INDEX(i)	{false},	i##_index
+#define INDEX(i) {false}, i##_index
 
-static const ctl_named_node_t	thread_tcache_node[] = {
-	{NAME("enabled"),	CTL(thread_tcache_enabled)},
-	{NAME("flush"),		CTL(thread_tcache_flush)}
+static const ctl_named_node_t thread_tcache_ncached_max_node[] = {
+    {NAME("read_sizeclass"), CTL(thread_tcache_ncached_max_read_sizeclass)},
+    {NAME("write"), CTL(thread_tcache_ncached_max_write)}};
+
+static const ctl_named_node_t thread_tcache_node[] = {
+    {NAME("enabled"), CTL(thread_tcache_enabled)},
+    {NAME("max"), CTL(thread_tcache_max)},
+    {NAME("flush"), CTL(thread_tcache_flush)},
+    {NAME("ncached_max"), CHILD(named, thread_tcache_ncached_max)}};
+
+static const ctl_named_node_t thread_peak_node[] = {
+    {NAME("read"), CTL(thread_peak_read)},
+    {NAME("reset"), CTL(thread_peak_reset)},
 };
 
-static const ctl_named_node_t	thread_peak_node[] = {
-	{NAME("read"),		CTL(thread_peak_read)},
-	{NAME("reset"),		CTL(thread_peak_reset)},
-};
+static const ctl_named_node_t thread_prof_node[] = {
+    {NAME("name"), CTL(thread_prof_name)},
+    {NAME("active"), CTL(thread_prof_active)}};
 
-static const ctl_named_node_t	thread_prof_node[] = {
-	{NAME("name"),		CTL(thread_prof_name)},
-	{NAME("active"),	CTL(thread_prof_active)}
-};
+static const ctl_named_node_t thread_node[] = {
+    {NAME("arena"), CTL(thread_arena)},
+    {NAME("allocated"), CTL(thread_allocated)},
+    {NAME("allocatedp"), CTL(thread_allocatedp)},
+    {NAME("deallocated"), CTL(thread_deallocated)},
+    {NAME("deallocatedp"), CTL(thread_deallocatedp)},
+    {NAME("tcache"), CHILD(named, thread_tcache)},
+    {NAME("peak"), CHILD(named, thread_peak)},
+    {NAME("prof"), CHILD(named, thread_prof)},
+    {NAME("idle"), CTL(thread_idle)}};
 
-static const ctl_named_node_t	thread_node[] = {
-	{NAME("arena"),		CTL(thread_arena)},
-	{NAME("allocated"),	CTL(thread_allocated)},
-	{NAME("allocatedp"),	CTL(thread_allocatedp)},
-	{NAME("deallocated"),	CTL(thread_deallocated)},
-	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
-	{NAME("tcache"),	CHILD(named, thread_tcache)},
-	{NAME("peak"),		CHILD(named, thread_peak)},
-	{NAME("prof"),		CHILD(named, thread_prof)},
-	{NAME("idle"),		CTL(thread_idle)}
-};
+static const ctl_named_node_t config_node[] = {
+    {NAME("cache_oblivious"), CTL(config_cache_oblivious)},
+    {NAME("debug"), CTL(config_debug)}, {NAME("fill"), CTL(config_fill)},
+    {NAME("lazy_lock"), CTL(config_lazy_lock)},
+    {NAME("malloc_conf"), CTL(config_malloc_conf)},
+    {NAME("opt_safety_checks"), CTL(config_opt_safety_checks)},
+    {NAME("prof"), CTL(config_prof)},
+    {NAME("prof_libgcc"), CTL(config_prof_libgcc)},
+    {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
+    {NAME("prof_frameptr"), CTL(config_prof_frameptr)},
+    {NAME("stats"), CTL(config_stats)}, {NAME("utrace"), CTL(config_utrace)},
+    {NAME("xmalloc"), CTL(config_xmalloc)}};
 
-static const ctl_named_node_t	config_node[] = {
-	{NAME("cache_oblivious"), CTL(config_cache_oblivious)},
-	{NAME("debug"),		CTL(config_debug)},
-	{NAME("fill"),		CTL(config_fill)},
-	{NAME("lazy_lock"),	CTL(config_lazy_lock)},
-	{NAME("malloc_conf"),	CTL(config_malloc_conf)},
-	{NAME("opt_safety_checks"),	CTL(config_opt_safety_checks)},
-	{NAME("prof"),		CTL(config_prof)},
-	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
-	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
-	{NAME("stats"),		CTL(config_stats)},
-	{NAME("utrace"),	CTL(config_utrace)},
-	{NAME("xmalloc"),	CTL(config_xmalloc)}
-};
+static const ctl_named_node_t opt_malloc_conf_node[] = {
+    {NAME("symlink"), CTL(opt_malloc_conf_symlink)},
+    {NAME("env_var"), CTL(opt_malloc_conf_env_var)},
+    {NAME("global_var"), CTL(opt_malloc_conf_global_var)},
+    {NAME("global_var_2_conf_harder"),
+        CTL(opt_malloc_conf_global_var_2_conf_harder)}};
 
-static const ctl_named_node_t opt_node[] = {
-	{NAME("abort"),		CTL(opt_abort)},
-	{NAME("abort_conf"),	CTL(opt_abort_conf)},
-	{NAME("cache_oblivious"),	CTL(opt_cache_oblivious)},
-	{NAME("trust_madvise"),	CTL(opt_trust_madvise)},
-	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
-	{NAME("hpa"),		CTL(opt_hpa)},
-	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
-	{NAME("hpa_hugification_threshold"),
-		CTL(opt_hpa_hugification_threshold)},
-	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
-	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
-	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
-	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
-	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
-	{NAME("hpa_sec_bytes_after_flush"),
-		CTL(opt_hpa_sec_bytes_after_flush)},
-	{NAME("hpa_sec_batch_fill_extra"),
-		CTL(opt_hpa_sec_batch_fill_extra)},
-	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
-	{NAME("retain"),	CTL(opt_retain)},
-	{NAME("dss"),		CTL(opt_dss)},
-	{NAME("narenas"),	CTL(opt_narenas)},
-	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
-	{NAME("oversize_threshold"),	CTL(opt_oversize_threshold)},
-	{NAME("mutex_max_spin"),	CTL(opt_mutex_max_spin)},
-	{NAME("background_thread"),	CTL(opt_background_thread)},
-	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
-	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
-	{NAME("stats_print"),	CTL(opt_stats_print)},
-	{NAME("stats_print_opts"),	CTL(opt_stats_print_opts)},
-	{NAME("stats_interval"),	CTL(opt_stats_interval)},
-	{NAME("stats_interval_opts"),	CTL(opt_stats_interval_opts)},
-	{NAME("junk"),		CTL(opt_junk)},
-	{NAME("zero"),		CTL(opt_zero)},
-	{NAME("utrace"),	CTL(opt_utrace)},
-	{NAME("xmalloc"),	CTL(opt_xmalloc)},
-	{NAME("experimental_infallible_new"),
-		CTL(opt_experimental_infallible_new)},
-	{NAME("tcache"),	CTL(opt_tcache)},
-	{NAME("tcache_max"),	CTL(opt_tcache_max)},
-	{NAME("tcache_nslots_small_min"),
-		CTL(opt_tcache_nslots_small_min)},
-	{NAME("tcache_nslots_small_max"),
-		CTL(opt_tcache_nslots_small_max)},
-	{NAME("tcache_nslots_large"),	CTL(opt_tcache_nslots_large)},
-	{NAME("lg_tcache_nslots_mul"),	CTL(opt_lg_tcache_nslots_mul)},
-	{NAME("tcache_gc_incr_bytes"),	CTL(opt_tcache_gc_incr_bytes)},
-	{NAME("tcache_gc_delay_bytes"),	CTL(opt_tcache_gc_delay_bytes)},
-	{NAME("lg_tcache_flush_small_div"),
-		CTL(opt_lg_tcache_flush_small_div)},
-	{NAME("lg_tcache_flush_large_div"),
-		CTL(opt_lg_tcache_flush_large_div)},
-	{NAME("thp"),		CTL(opt_thp)},
-	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
-	{NAME("prof"),		CTL(opt_prof)},
-	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
-	{NAME("prof_active"),	CTL(opt_prof_active)},
-	{NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
-	{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
-	{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
-	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
-	{NAME("prof_final"),	CTL(opt_prof_final)},
-	{NAME("prof_leak"),	CTL(opt_prof_leak)},
-	{NAME("prof_leak_error"),	CTL(opt_prof_leak_error)},
-	{NAME("prof_accum"),	CTL(opt_prof_accum)},
-	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
-	{NAME("prof_stats"),	CTL(opt_prof_stats)},
-	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
-	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
-	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
-};
+static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
+    {NAME("abort_conf"), CTL(opt_abort_conf)},
+    {NAME("cache_oblivious"), CTL(opt_cache_oblivious)},
+    {NAME("trust_madvise"), CTL(opt_trust_madvise)},
+    {NAME("experimental_hpa_start_huge_if_thp_always"),
+        CTL(opt_experimental_hpa_start_huge_if_thp_always)},
+    {NAME("experimental_hpa_enforce_hugify"),
+        CTL(opt_experimental_hpa_enforce_hugify)},
+    {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)},
+    {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
+    {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)},
+    {NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
+    {NAME("hpa_hugify_sync"), CTL(opt_hpa_hugify_sync)},
+    {NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
+    {NAME("experimental_hpa_max_purge_nhp"),
+        CTL(opt_experimental_hpa_max_purge_nhp)},
+    {NAME("hpa_purge_threshold"), CTL(opt_hpa_purge_threshold)},
+    {NAME("hpa_min_purge_delay_ms"), CTL(opt_hpa_min_purge_delay_ms)},
+    {NAME("hpa_hugify_style"), CTL(opt_hpa_hugify_style)},
+    {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
+    {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
+    {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
+    {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
+    {NAME("hpa_sec_batch_fill_extra"), CTL(opt_hpa_sec_batch_fill_extra)},
+    {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
+    {NAME("metadata_thp"), CTL(opt_metadata_thp)},
+    {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)},
+    {NAME("narenas"), CTL(opt_narenas)},
+    {NAME("percpu_arena"), CTL(opt_percpu_arena)},
+    {NAME("oversize_threshold"), CTL(opt_oversize_threshold)},
+    {NAME("mutex_max_spin"), CTL(opt_mutex_max_spin)},
+    {NAME("background_thread"), CTL(opt_background_thread)},
+    {NAME("max_background_threads"), CTL(opt_max_background_threads)},
+    {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
+    {NAME("stats_print"), CTL(opt_stats_print)},
+    {NAME("stats_print_opts"), CTL(opt_stats_print_opts)},
+    {NAME("stats_interval"), CTL(opt_stats_interval)},
+    {NAME("stats_interval_opts"), CTL(opt_stats_interval_opts)},
+    {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)},
+    {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)},
+    {NAME("experimental_infallible_new"), CTL(opt_experimental_infallible_new)},
+    {NAME("experimental_tcache_gc"), CTL(opt_experimental_tcache_gc)},
+    {NAME("tcache"), CTL(opt_tcache)},
+    {NAME("tcache_max"), CTL(opt_tcache_max)},
+    {NAME("tcache_nslots_small_min"), CTL(opt_tcache_nslots_small_min)},
+    {NAME("tcache_nslots_small_max"), CTL(opt_tcache_nslots_small_max)},
+    {NAME("tcache_nslots_large"), CTL(opt_tcache_nslots_large)},
+    {NAME("lg_tcache_nslots_mul"), CTL(opt_lg_tcache_nslots_mul)},
+    {NAME("tcache_gc_incr_bytes"), CTL(opt_tcache_gc_incr_bytes)},
+    {NAME("tcache_gc_delay_bytes"), CTL(opt_tcache_gc_delay_bytes)},
+    {NAME("lg_tcache_flush_small_div"), CTL(opt_lg_tcache_flush_small_div)},
+    {NAME("lg_tcache_flush_large_div"), CTL(opt_lg_tcache_flush_large_div)},
+    {NAME("thp"), CTL(opt_thp)},
+    {NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
+    {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)},
+    {NAME("prof_active"), CTL(opt_prof_active)},
+    {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
+    {NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
+    {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
+    {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
+    {NAME("prof_gdump"), CTL(opt_prof_gdump)},
+    {NAME("prof_final"), CTL(opt_prof_final)},
+    {NAME("prof_leak"), CTL(opt_prof_leak)},
+    {NAME("prof_leak_error"), CTL(opt_prof_leak_error)},
+    {NAME("prof_accum"), CTL(opt_prof_accum)},
+    {NAME("prof_pid_namespace"), CTL(opt_prof_pid_namespace)},
+    {NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
+    {NAME("prof_stats"), CTL(opt_prof_stats)},
+    {NAME("prof_sys_thread_name"), CTL(opt_prof_sys_thread_name)},
+    {NAME("prof_time_resolution"), CTL(opt_prof_time_res)},
+    {NAME("lg_san_uaf_align"), CTL(opt_lg_san_uaf_align)},
+    {NAME("zero_realloc"), CTL(opt_zero_realloc)},
+    {NAME("debug_double_free_max_scan"), CTL(opt_debug_double_free_max_scan)},
+    {NAME("disable_large_size_classes"), CTL(opt_disable_large_size_classes)},
+    {NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
+    {NAME("malloc_conf"), CHILD(named, opt_malloc_conf)}};
 
-static const ctl_named_node_t	tcache_node[] = {
-	{NAME("create"),	CTL(tcache_create)},
-	{NAME("flush"),		CTL(tcache_flush)},
-	{NAME("destroy"),	CTL(tcache_destroy)}
-};
+static const ctl_named_node_t tcache_node[] = {
+    {NAME("create"), CTL(tcache_create)}, {NAME("flush"), CTL(tcache_flush)},
+    {NAME("destroy"), CTL(tcache_destroy)}};
 
 static const ctl_named_node_t arena_i_node[] = {
-	{NAME("initialized"),	CTL(arena_i_initialized)},
-	{NAME("decay"),		CTL(arena_i_decay)},
-	{NAME("purge"),		CTL(arena_i_purge)},
-	{NAME("reset"),		CTL(arena_i_reset)},
-	{NAME("destroy"),	CTL(arena_i_destroy)},
-	{NAME("dss"),		CTL(arena_i_dss)},
-	/*
+    {NAME("initialized"), CTL(arena_i_initialized)},
+    {NAME("decay"), CTL(arena_i_decay)}, {NAME("purge"), CTL(arena_i_purge)},
+    {NAME("reset"), CTL(arena_i_reset)},
+    {NAME("destroy"), CTL(arena_i_destroy)}, {NAME("dss"), CTL(arena_i_dss)},
+    /*
 	 * Undocumented for now, since we anticipate an arena API in flux after
 	 * we cut the last 5-series release.
 	 */
-	{NAME("oversize_threshold"), CTL(arena_i_oversize_threshold)},
-	{NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
-	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)},
-	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)}
-};
+    {NAME("oversize_threshold"), CTL(arena_i_oversize_threshold)},
+    {NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
+    {NAME("extent_hooks"), CTL(arena_i_extent_hooks)},
+    {NAME("retain_grow_limit"), CTL(arena_i_retain_grow_limit)},
+    {NAME("name"), CTL(arena_i_name)}};
 static const ctl_named_node_t super_arena_i_node[] = {
-	{NAME(""),		CHILD(named, arena_i)}
-};
+    {NAME(""), CHILD(named, arena_i)}};
 
-static const ctl_indexed_node_t arena_node[] = {
-	{INDEX(arena_i)}
-};
+static const ctl_indexed_node_t arena_node[] = {{INDEX(arena_i)}};
 
 static const ctl_named_node_t arenas_bin_i_node[] = {
-	{NAME("size"),		CTL(arenas_bin_i_size)},
-	{NAME("nregs"),		CTL(arenas_bin_i_nregs)},
-	{NAME("slab_size"),	CTL(arenas_bin_i_slab_size)},
-	{NAME("nshards"),	CTL(arenas_bin_i_nshards)}
-};
+    {NAME("size"), CTL(arenas_bin_i_size)},
+    {NAME("nregs"), CTL(arenas_bin_i_nregs)},
+    {NAME("slab_size"), CTL(arenas_bin_i_slab_size)},
+    {NAME("nshards"), CTL(arenas_bin_i_nshards)}};
 static const ctl_named_node_t super_arenas_bin_i_node[] = {
-	{NAME(""),		CHILD(named, arenas_bin_i)}
-};
+    {NAME(""), CHILD(named, arenas_bin_i)}};
 
-static const ctl_indexed_node_t arenas_bin_node[] = {
-	{INDEX(arenas_bin_i)}
-};
+static const ctl_indexed_node_t arenas_bin_node[] = {{INDEX(arenas_bin_i)}};
 
 static const ctl_named_node_t arenas_lextent_i_node[] = {
-	{NAME("size"),		CTL(arenas_lextent_i_size)}
-};
+    {NAME("size"), CTL(arenas_lextent_i_size)}};
 static const ctl_named_node_t super_arenas_lextent_i_node[] = {
-	{NAME(""),		CHILD(named, arenas_lextent_i)}
-};
+    {NAME(""), CHILD(named, arenas_lextent_i)}};
 
 static const ctl_indexed_node_t arenas_lextent_node[] = {
-	{INDEX(arenas_lextent_i)}
-};
+    {INDEX(arenas_lextent_i)}};
 
 static const ctl_named_node_t arenas_node[] = {
-	{NAME("narenas"),	CTL(arenas_narenas)},
-	{NAME("dirty_decay_ms"), CTL(arenas_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
-	{NAME("quantum"),	CTL(arenas_quantum)},
-	{NAME("page"),		CTL(arenas_page)},
-	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
-	{NAME("nbins"),		CTL(arenas_nbins)},
-	{NAME("nhbins"),	CTL(arenas_nhbins)},
-	{NAME("bin"),		CHILD(indexed, arenas_bin)},
-	{NAME("nlextents"),	CTL(arenas_nlextents)},
-	{NAME("lextent"),	CHILD(indexed, arenas_lextent)},
-	{NAME("create"),	CTL(arenas_create)},
-	{NAME("lookup"),	CTL(arenas_lookup)}
-};
+    {NAME("narenas"), CTL(arenas_narenas)},
+    {NAME("dirty_decay_ms"), CTL(arenas_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
+    {NAME("quantum"), CTL(arenas_quantum)}, {NAME("page"), CTL(arenas_page)},
+    {NAME("hugepage"), CTL(arenas_hugepage)},
+    {NAME("tcache_max"), CTL(arenas_tcache_max)},
+    {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)},
+    {NAME("bin"), CHILD(indexed, arenas_bin)},
+    {NAME("nlextents"), CTL(arenas_nlextents)},
+    {NAME("lextent"), CHILD(indexed, arenas_lextent)},
+    {NAME("create"), CTL(arenas_create)}, {NAME("lookup"), CTL(arenas_lookup)}};
 
 static const ctl_named_node_t prof_stats_bins_i_node[] = {
-	{NAME("live"),		CTL(prof_stats_bins_i_live)},
-	{NAME("accum"),		CTL(prof_stats_bins_i_accum)}
-};
+    {NAME("live"), CTL(prof_stats_bins_i_live)},
+    {NAME("accum"), CTL(prof_stats_bins_i_accum)}};
 
 static const ctl_named_node_t super_prof_stats_bins_i_node[] = {
-	{NAME(""),		CHILD(named, prof_stats_bins_i)}
-};
+    {NAME(""), CHILD(named, prof_stats_bins_i)}};
 
 static const ctl_indexed_node_t prof_stats_bins_node[] = {
-	{INDEX(prof_stats_bins_i)}
-};
+    {INDEX(prof_stats_bins_i)}};
 
 static const ctl_named_node_t prof_stats_lextents_i_node[] = {
-	{NAME("live"),		CTL(prof_stats_lextents_i_live)},
-	{NAME("accum"),		CTL(prof_stats_lextents_i_accum)}
-};
+    {NAME("live"), CTL(prof_stats_lextents_i_live)},
+    {NAME("accum"), CTL(prof_stats_lextents_i_accum)}};
 
 static const ctl_named_node_t super_prof_stats_lextents_i_node[] = {
-	{NAME(""),		CHILD(named, prof_stats_lextents_i)}
-};
+    {NAME(""), CHILD(named, prof_stats_lextents_i)}};
 
 static const ctl_indexed_node_t prof_stats_lextents_node[] = {
-	{INDEX(prof_stats_lextents_i)}
+    {INDEX(prof_stats_lextents_i)}};
+
+static const ctl_named_node_t prof_stats_node[] = {
+    {NAME("bins"), CHILD(indexed, prof_stats_bins)},
+    {NAME("lextents"), CHILD(indexed, prof_stats_lextents)},
 };
 
-static const ctl_named_node_t	prof_stats_node[] = {
-	{NAME("bins"),		CHILD(indexed, prof_stats_bins)},
-	{NAME("lextents"),	CHILD(indexed, prof_stats_lextents)},
-};
-
-static const ctl_named_node_t	prof_node[] = {
-	{NAME("thread_active_init"), CTL(prof_thread_active_init)},
-	{NAME("active"),	CTL(prof_active)},
-	{NAME("dump"),		CTL(prof_dump)},
-	{NAME("gdump"),		CTL(prof_gdump)},
-	{NAME("prefix"),	CTL(prof_prefix)},
-	{NAME("reset"),		CTL(prof_reset)},
-	{NAME("interval"),	CTL(prof_interval)},
-	{NAME("lg_sample"),	CTL(lg_prof_sample)},
-	{NAME("log_start"),	CTL(prof_log_start)},
-	{NAME("log_stop"),	CTL(prof_log_stop)},
-	{NAME("stats"),		CHILD(named, prof_stats)}
-};
+static const ctl_named_node_t prof_node[] = {
+    {NAME("thread_active_init"), CTL(prof_thread_active_init)},
+    {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)},
+    {NAME("gdump"), CTL(prof_gdump)}, {NAME("prefix"), CTL(prof_prefix)},
+    {NAME("reset"), CTL(prof_reset)}, {NAME("interval"), CTL(prof_interval)},
+    {NAME("lg_sample"), CTL(lg_prof_sample)},
+    {NAME("log_start"), CTL(prof_log_start)},
+    {NAME("log_stop"), CTL(prof_log_stop)},
+    {NAME("stats"), CHILD(named, prof_stats)}};
 
 static const ctl_named_node_t stats_arenas_i_small_node[] = {
-	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
-	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_small_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)},
-	{NAME("nfills"),	CTL(stats_arenas_i_small_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_small_nflushes)}
-};
+    {NAME("allocated"), CTL(stats_arenas_i_small_allocated)},
+    {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)},
+    {NAME("nfills"), CTL(stats_arenas_i_small_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_small_nflushes)}};
 
 static const ctl_named_node_t stats_arenas_i_large_node[] = {
-	{NAME("allocated"),	CTL(stats_arenas_i_large_allocated)},
-	{NAME("nmalloc"),	CTL(stats_arenas_i_large_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_large_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)},
-	{NAME("nfills"),	CTL(stats_arenas_i_large_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_large_nflushes)}
-};
+    {NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
+    {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)},
+    {NAME("nfills"), CTL(stats_arenas_i_large_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_large_nflushes)}};
 
-#define MUTEX_PROF_DATA_NODE(prefix)					\
-static const ctl_named_node_t stats_##prefix##_node[] = {		\
-	{NAME("num_ops"),						\
-	 CTL(stats_##prefix##_num_ops)},				\
-	{NAME("num_wait"),						\
-	 CTL(stats_##prefix##_num_wait)},				\
-	{NAME("num_spin_acq"),						\
-	 CTL(stats_##prefix##_num_spin_acq)},				\
-	{NAME("num_owner_switch"),					\
-	 CTL(stats_##prefix##_num_owner_switch)},			\
-	{NAME("total_wait_time"),					\
-	 CTL(stats_##prefix##_total_wait_time)},			\
-	{NAME("max_wait_time"),						\
-	 CTL(stats_##prefix##_max_wait_time)},				\
-	{NAME("max_num_thds"),						\
-	 CTL(stats_##prefix##_max_num_thds)}				\
-	/* Note that # of current waiting thread not provided. */	\
-};
+#define MUTEX_PROF_DATA_NODE(prefix)                                                                          \
+	static const ctl_named_node_t stats_##prefix##_node[] = {                                             \
+	    {NAME("num_ops"), CTL(stats_##prefix##_num_ops)},                                                 \
+	    {NAME("num_wait"), CTL(stats_##prefix##_num_wait)},                                               \
+	    {NAME("num_spin_acq"), CTL(stats_##prefix##_num_spin_acq)},                                       \
+	    {NAME("num_owner_switch"),                                                                        \
+	        CTL(stats_##prefix##_num_owner_switch)},                                                      \
+	    {NAME("total_wait_time"), CTL(stats_##prefix##_total_wait_time)},                                 \
+	    {NAME("max_wait_time"), CTL(stats_##prefix##_max_wait_time)},                                     \
+	    {NAME("max_num_thds"),                                                                            \
+	        CTL(stats_##prefix##_max_num_thds)} /* Note that # of current waiting thread not provided. */ \
+	};
 
 MUTEX_PROF_DATA_NODE(arenas_i_bins_j_mutex)
 
 static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
-	{NAME("nmalloc"),	CTL(stats_arenas_i_bins_j_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_bins_j_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_bins_j_nrequests)},
-	{NAME("curregs"),	CTL(stats_arenas_i_bins_j_curregs)},
-	{NAME("nfills"),	CTL(stats_arenas_i_bins_j_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_bins_j_nflushes)},
-	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
-	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
-	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
-	{NAME("nonfull_slabs"),	CTL(stats_arenas_i_bins_j_nonfull_slabs)},
-	{NAME("mutex"),		CHILD(named, stats_arenas_i_bins_j_mutex)}
-};
+    {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)},
+    {NAME("curregs"), CTL(stats_arenas_i_bins_j_curregs)},
+    {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
+    {NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)},
+    {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
+    {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
+    {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)},
+    {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}};
 
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_bins_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_bins_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
-	{INDEX(stats_arenas_i_bins_j)}
-};
+    {INDEX(stats_arenas_i_bins_j)}};
 
 static const ctl_named_node_t stats_arenas_i_lextents_j_node[] = {
-	{NAME("nmalloc"),	CTL(stats_arenas_i_lextents_j_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_lextents_j_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_lextents_j_nrequests)},
-	{NAME("curlextents"),	CTL(stats_arenas_i_lextents_j_curlextents)}
-};
+    {NAME("nmalloc"), CTL(stats_arenas_i_lextents_j_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_lextents_j_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_lextents_j_nrequests)},
+    {NAME("curlextents"), CTL(stats_arenas_i_lextents_j_curlextents)}};
 static const ctl_named_node_t super_stats_arenas_i_lextents_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_lextents_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_lextents_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
-	{INDEX(stats_arenas_i_lextents_j)}
-};
+    {INDEX(stats_arenas_i_lextents_j)}};
 
 static const ctl_named_node_t stats_arenas_i_extents_j_node[] = {
-	{NAME("ndirty"),	CTL(stats_arenas_i_extents_j_ndirty)},
-	{NAME("nmuzzy"),	CTL(stats_arenas_i_extents_j_nmuzzy)},
-	{NAME("nretained"),	CTL(stats_arenas_i_extents_j_nretained)},
-	{NAME("dirty_bytes"),	CTL(stats_arenas_i_extents_j_dirty_bytes)},
-	{NAME("muzzy_bytes"),	CTL(stats_arenas_i_extents_j_muzzy_bytes)},
-	{NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}
-};
+    {NAME("ndirty"), CTL(stats_arenas_i_extents_j_ndirty)},
+    {NAME("nmuzzy"), CTL(stats_arenas_i_extents_j_nmuzzy)},
+    {NAME("nretained"), CTL(stats_arenas_i_extents_j_nretained)},
+    {NAME("dirty_bytes"), CTL(stats_arenas_i_extents_j_dirty_bytes)},
+    {NAME("muzzy_bytes"), CTL(stats_arenas_i_extents_j_muzzy_bytes)},
+    {NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}};
 
 static const ctl_named_node_t super_stats_arenas_i_extents_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_extents_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_extents_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_extents_node[] = {
-	{INDEX(stats_arenas_i_extents_j)}
-};
+    {INDEX(stats_arenas_i_extents_j)}};
 
-#define OP(mtx)  MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
+#define OP(mtx) MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 static const ctl_named_node_t stats_arenas_i_mutexes_node[] = {
 #define OP(mtx) {NAME(#mtx), CHILD(named, stats_arenas_i_mutexes_##mtx)},
-MUTEX_PROF_ARENA_MUTEXES
+    MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
+static const ctl_named_node_t stats_arenas_i_hpa_shard_slabs_node[] = {
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"), CTL(stats_arenas_i_hpa_shard_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"), CTL(stats_arenas_i_hpa_shard_slabs_ndirty_huge)}};
+
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}
-};
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}};
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_empty_slabs_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)}
-};
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)}};
 
-static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)}
-};
+static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] =
+    {{NAME("npageslabs_nonhuge"),
+         CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
+        {NAME("npageslabs_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
+        {NAME("nactive_nonhuge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
+        {NAME("nactive_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
+        {NAME("ndirty_nonhuge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)},
+        {NAME("ndirty_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)}};
 
-static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME(""),
-		CHILD(named, stats_arenas_i_hpa_shard_nonfull_slabs_j)}
-};
+static const ctl_named_node_t
+    super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
+        {NAME(""), CHILD(named, stats_arenas_i_hpa_shard_nonfull_slabs_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
-{
-	{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}
-};
+    {{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}};
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
-	{NAME("full_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_full_slabs)},
-	{NAME("empty_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_empty_slabs)},
-	{NAME("nonfull_slabs"),	CHILD(indexed,
-	    stats_arenas_i_hpa_shard_nonfull_slabs)},
+    {NAME("npageslabs"), CTL(stats_arenas_i_hpa_shard_npageslabs)},
+    {NAME("nactive"), CTL(stats_arenas_i_hpa_shard_nactive)},
+    {NAME("ndirty"), CTL(stats_arenas_i_hpa_shard_ndirty)},
 
-	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
-	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
-	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
-	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)}
-};
+    {NAME("slabs"), CHILD(named, stats_arenas_i_hpa_shard_slabs)},
+
+    {NAME("npurge_passes"), CTL(stats_arenas_i_hpa_shard_npurge_passes)},
+    {NAME("npurges"), CTL(stats_arenas_i_hpa_shard_npurges)},
+    {NAME("nhugifies"), CTL(stats_arenas_i_hpa_shard_nhugifies)},
+    {NAME("nhugify_failures"), CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
+    {NAME("ndehugifies"), CTL(stats_arenas_i_hpa_shard_ndehugifies)},
+
+    {NAME("full_slabs"), CHILD(named, stats_arenas_i_hpa_shard_full_slabs)},
+    {NAME("empty_slabs"), CHILD(named, stats_arenas_i_hpa_shard_empty_slabs)},
+    {NAME("nonfull_slabs"),
+        CHILD(indexed, stats_arenas_i_hpa_shard_nonfull_slabs)}};
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
-	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
-	{NAME("uptime"),	CTL(stats_arenas_i_uptime)},
-	{NAME("dss"),		CTL(stats_arenas_i_dss)},
-	{NAME("dirty_decay_ms"), CTL(stats_arenas_i_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(stats_arenas_i_muzzy_decay_ms)},
-	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
-	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
-	{NAME("pmuzzy"),	CTL(stats_arenas_i_pmuzzy)},
-	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
-	{NAME("retained"),	CTL(stats_arenas_i_retained)},
-	{NAME("extent_avail"),	CTL(stats_arenas_i_extent_avail)},
-	{NAME("dirty_npurge"),	CTL(stats_arenas_i_dirty_npurge)},
-	{NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
-	{NAME("dirty_purged"),	CTL(stats_arenas_i_dirty_purged)},
-	{NAME("muzzy_npurge"),	CTL(stats_arenas_i_muzzy_npurge)},
-	{NAME("muzzy_nmadvise"), CTL(stats_arenas_i_muzzy_nmadvise)},
-	{NAME("muzzy_purged"),	CTL(stats_arenas_i_muzzy_purged)},
-	{NAME("base"),		CTL(stats_arenas_i_base)},
-	{NAME("internal"),	CTL(stats_arenas_i_internal)},
-	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
-	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
-	{NAME("tcache_stashed_bytes"),
-	    CTL(stats_arenas_i_tcache_stashed_bytes)},
-	{NAME("resident"),	CTL(stats_arenas_i_resident)},
-	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
-	{NAME("hpa_sec_bytes"),	CTL(stats_arenas_i_hpa_sec_bytes)},
-	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
-	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
-	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
-	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)},
-	{NAME("extents"),	CHILD(indexed, stats_arenas_i_extents)},
-	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)},
-	{NAME("hpa_shard"),	CHILD(named, stats_arenas_i_hpa_shard)}
-};
+    {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
+    {NAME("uptime"), CTL(stats_arenas_i_uptime)},
+    {NAME("dss"), CTL(stats_arenas_i_dss)},
+    {NAME("dirty_decay_ms"), CTL(stats_arenas_i_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(stats_arenas_i_muzzy_decay_ms)},
+    {NAME("pactive"), CTL(stats_arenas_i_pactive)},
+    {NAME("pdirty"), CTL(stats_arenas_i_pdirty)},
+    {NAME("pmuzzy"), CTL(stats_arenas_i_pmuzzy)},
+    {NAME("mapped"), CTL(stats_arenas_i_mapped)},
+    {NAME("retained"), CTL(stats_arenas_i_retained)},
+    {NAME("extent_avail"), CTL(stats_arenas_i_extent_avail)},
+    {NAME("dirty_npurge"), CTL(stats_arenas_i_dirty_npurge)},
+    {NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
+    {NAME("dirty_purged"), CTL(stats_arenas_i_dirty_purged)},
+    {NAME("muzzy_npurge"), CTL(stats_arenas_i_muzzy_npurge)},
+    {NAME("muzzy_nmadvise"), CTL(stats_arenas_i_muzzy_nmadvise)},
+    {NAME("muzzy_purged"), CTL(stats_arenas_i_muzzy_purged)},
+    {NAME("base"), CTL(stats_arenas_i_base)},
+    {NAME("internal"), CTL(stats_arenas_i_internal)},
+    {NAME("metadata_edata"), CTL(stats_arenas_i_metadata_edata)},
+    {NAME("metadata_rtree"), CTL(stats_arenas_i_metadata_rtree)},
+    {NAME("metadata_thp"), CTL(stats_arenas_i_metadata_thp)},
+    {NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)},
+    {NAME("tcache_stashed_bytes"), CTL(stats_arenas_i_tcache_stashed_bytes)},
+    {NAME("resident"), CTL(stats_arenas_i_resident)},
+    {NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)},
+    {NAME("hpa_sec_bytes"), CTL(stats_arenas_i_hpa_sec_bytes)},
+    {NAME("hpa_sec_hits"), CTL(stats_arenas_i_hpa_sec_hits)},
+    {NAME("hpa_sec_misses"), CTL(stats_arenas_i_hpa_sec_misses)},
+    {NAME("hpa_sec_dalloc_noflush"),
+        CTL(stats_arenas_i_hpa_sec_dalloc_noflush)},
+    {NAME("hpa_sec_dalloc_flush"), CTL(stats_arenas_i_hpa_sec_dalloc_flush)},
+    {NAME("hpa_sec_overfills"), CTL(stats_arenas_i_hpa_sec_overfills)},
+    {NAME("small"), CHILD(named, stats_arenas_i_small)},
+    {NAME("large"), CHILD(named, stats_arenas_i_large)},
+    {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
+    {NAME("lextents"), CHILD(indexed, stats_arenas_i_lextents)},
+    {NAME("extents"), CHILD(indexed, stats_arenas_i_extents)},
+    {NAME("mutexes"), CHILD(named, stats_arenas_i_mutexes)},
+    {NAME("hpa_shard"), CHILD(named, stats_arenas_i_hpa_shard)}};
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i)}};
 
-static const ctl_indexed_node_t stats_arenas_node[] = {
-	{INDEX(stats_arenas_i)}
-};
+static const ctl_indexed_node_t stats_arenas_node[] = {{INDEX(stats_arenas_i)}};
 
 static const ctl_named_node_t stats_background_thread_node[] = {
-	{NAME("num_threads"),	CTL(stats_background_thread_num_threads)},
-	{NAME("num_runs"),	CTL(stats_background_thread_num_runs)},
-	{NAME("run_interval"),	CTL(stats_background_thread_run_interval)}
-};
+    {NAME("num_threads"), CTL(stats_background_thread_num_threads)},
+    {NAME("num_runs"), CTL(stats_background_thread_num_runs)},
+    {NAME("run_interval"), CTL(stats_background_thread_run_interval)}};
 
 #define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx)
 MUTEX_PROF_GLOBAL_MUTEXES
@@ -825,89 +853,81 @@ MUTEX_PROF_GLOBAL_MUTEXES
 
 static const ctl_named_node_t stats_mutexes_node[] = {
 #define OP(mtx) {NAME(#mtx), CHILD(named, stats_mutexes_##mtx)},
-MUTEX_PROF_GLOBAL_MUTEXES
+    MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
-	{NAME("reset"),		CTL(stats_mutexes_reset)}
-};
+    {NAME("reset"), CTL(stats_mutexes_reset)}};
 #undef MUTEX_PROF_DATA_NODE
 
+static const ctl_named_node_t approximate_stats_node[] = {
+    {NAME("active"), CTL(approximate_stats_active)},
+};
+
 static const ctl_named_node_t stats_node[] = {
-	{NAME("allocated"),	CTL(stats_allocated)},
-	{NAME("active"),	CTL(stats_active)},
-	{NAME("metadata"),	CTL(stats_metadata)},
-	{NAME("metadata_thp"),	CTL(stats_metadata_thp)},
-	{NAME("resident"),	CTL(stats_resident)},
-	{NAME("mapped"),	CTL(stats_mapped)},
-	{NAME("retained"),	CTL(stats_retained)},
-	{NAME("background_thread"),
-	 CHILD(named, stats_background_thread)},
-	{NAME("mutexes"),	CHILD(named, stats_mutexes)},
-	{NAME("arenas"),	CHILD(indexed, stats_arenas)},
-	{NAME("zero_reallocs"),	CTL(stats_zero_reallocs)},
+    {NAME("allocated"), CTL(stats_allocated)},
+    {NAME("active"), CTL(stats_active)},
+    {NAME("metadata"), CTL(stats_metadata)},
+    {NAME("metadata_edata"), CTL(stats_metadata_edata)},
+    {NAME("metadata_rtree"), CTL(stats_metadata_rtree)},
+    {NAME("metadata_thp"), CTL(stats_metadata_thp)},
+    {NAME("resident"), CTL(stats_resident)},
+    {NAME("mapped"), CTL(stats_mapped)},
+    {NAME("retained"), CTL(stats_retained)},
+    {NAME("background_thread"), CHILD(named, stats_background_thread)},
+    {NAME("mutexes"), CHILD(named, stats_mutexes)},
+    {NAME("arenas"), CHILD(indexed, stats_arenas)},
+    {NAME("zero_reallocs"), CTL(stats_zero_reallocs)},
 };
 
 static const ctl_named_node_t experimental_hooks_node[] = {
-	{NAME("install"),	CTL(experimental_hooks_install)},
-	{NAME("remove"),	CTL(experimental_hooks_remove)},
-	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
-	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
-	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
-};
-
-static const ctl_named_node_t experimental_thread_node[] = {
-	{NAME("activity_callback"),
-		CTL(experimental_thread_activity_callback)}
+    {NAME("install"), CTL(experimental_hooks_install)},
+    {NAME("remove"), CTL(experimental_hooks_remove)},
+    {NAME("prof_backtrace"), CTL(experimental_hooks_prof_backtrace)},
+    {NAME("prof_dump"), CTL(experimental_hooks_prof_dump)},
+    {NAME("prof_sample"), CTL(experimental_hooks_prof_sample)},
+    {NAME("prof_sample_free"), CTL(experimental_hooks_prof_sample_free)},
+    {NAME("safety_check_abort"), CTL(experimental_hooks_safety_check_abort)},
+    {NAME("thread_event"), CTL(experimental_hooks_thread_event)},
 };
 
 static const ctl_named_node_t experimental_utilization_node[] = {
-	{NAME("query"),		CTL(experimental_utilization_query)},
-	{NAME("batch_query"),	CTL(experimental_utilization_batch_query)}
-};
+    {NAME("query"), CTL(experimental_utilization_query)},
+    {NAME("batch_query"), CTL(experimental_utilization_batch_query)}};
 
 static const ctl_named_node_t experimental_arenas_i_node[] = {
-	{NAME("pactivep"),	CTL(experimental_arenas_i_pactivep)}
-};
+    {NAME("pactivep"), CTL(experimental_arenas_i_pactivep)}};
 static const ctl_named_node_t super_experimental_arenas_i_node[] = {
-	{NAME(""),		CHILD(named, experimental_arenas_i)}
-};
+    {NAME(""), CHILD(named, experimental_arenas_i)}};
 
 static const ctl_indexed_node_t experimental_arenas_node[] = {
-	{INDEX(experimental_arenas_i)}
-};
+    {INDEX(experimental_arenas_i)}};
 
 static const ctl_named_node_t experimental_prof_recent_node[] = {
-	{NAME("alloc_max"),	CTL(experimental_prof_recent_alloc_max)},
-	{NAME("alloc_dump"),	CTL(experimental_prof_recent_alloc_dump)},
+    {NAME("alloc_max"), CTL(experimental_prof_recent_alloc_max)},
+    {NAME("alloc_dump"), CTL(experimental_prof_recent_alloc_dump)},
 };
 
 static const ctl_named_node_t experimental_node[] = {
-	{NAME("hooks"),		CHILD(named, experimental_hooks)},
-	{NAME("utilization"),	CHILD(named, experimental_utilization)},
-	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
-	{NAME("arenas_create_ext"),	CTL(experimental_arenas_create_ext)},
-	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
-	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)},
-	{NAME("thread"),	CHILD(named, experimental_thread)}
-};
+    {NAME("hooks"), CHILD(named, experimental_hooks)},
+    {NAME("utilization"), CHILD(named, experimental_utilization)},
+    {NAME("arenas"), CHILD(indexed, experimental_arenas)},
+    {NAME("arenas_create_ext"), CTL(experimental_arenas_create_ext)},
+    {NAME("prof_recent"), CHILD(named, experimental_prof_recent)},
+    {NAME("batch_alloc"), CTL(experimental_batch_alloc)}};
 
-static const ctl_named_node_t	root_node[] = {
-	{NAME("version"),	CTL(version)},
-	{NAME("epoch"),		CTL(epoch)},
-	{NAME("background_thread"),	CTL(background_thread)},
-	{NAME("max_background_threads"),	CTL(max_background_threads)},
-	{NAME("thread"),	CHILD(named, thread)},
-	{NAME("config"),	CHILD(named, config)},
-	{NAME("opt"),		CHILD(named, opt)},
-	{NAME("tcache"),	CHILD(named, tcache)},
-	{NAME("arena"),		CHILD(indexed, arena)},
-	{NAME("arenas"),	CHILD(named, arenas)},
-	{NAME("prof"),		CHILD(named, prof)},
-	{NAME("stats"),		CHILD(named, stats)},
-	{NAME("experimental"),	CHILD(named, experimental)}
-};
+static const ctl_named_node_t root_node[] = {{NAME("version"), CTL(version)},
+    {NAME("epoch"), CTL(epoch)},
+    {NAME("background_thread"), CTL(background_thread)},
+    {NAME("max_background_threads"), CTL(max_background_threads)},
+    {NAME("thread"), CHILD(named, thread)},
+    {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)},
+    {NAME("tcache"), CHILD(named, tcache)},
+    {NAME("arena"), CHILD(indexed, arena)},
+    {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)},
+    {NAME("stats"), CHILD(named, stats)},
+    {NAME("approximate_stats"), CHILD(named, approximate_stats)},
+    {NAME("experimental"), CHILD(named, experimental)}};
 static const ctl_named_node_t super_root_node[] = {
-	{NAME(""),		CHILD(named, root)}
-};
+    {NAME(""), CHILD(named, root)}};
 
 #undef NAME
 #undef CHILD
@@ -922,8 +942,7 @@ static const ctl_named_node_t super_root_node[] = {
  */
 static void
 ctl_accum_locked_u64(locked_u64_t *dst, locked_u64_t *src) {
-	locked_inc_u64_unsynchronized(dst,
-	    locked_read_u64_unsynchronized(src));
+	locked_inc_u64_unsynchronized(dst, locked_read_u64_unsynchronized(src));
 }
 
 static void
@@ -963,8 +982,8 @@ arenas_i2a_impl(size_t i, bool compat, bool validate) {
 			 * more than one past the range of indices that have
 			 * initialized ctl data.
 			 */
-			assert(i < ctl_arenas->narenas || (!validate && i ==
-			    ctl_arenas->narenas));
+			assert(i < ctl_arenas->narenas
+			    || (!validate && i == ctl_arenas->narenas));
 			a = (unsigned)i + 2;
 		}
 		break;
@@ -988,12 +1007,12 @@ arenas_i_impl(tsd_t *tsd, size_t i, bool compat, bool init) {
 	if (init && ret == NULL) {
 		if (config_stats) {
 			struct container_s {
-				ctl_arena_t		ctl_arena;
-				ctl_arena_stats_t	astats;
+				ctl_arena_t       ctl_arena;
+				ctl_arena_stats_t astats;
 			};
-			struct container_s *cont =
-			    (struct container_s *)base_alloc(tsd_tsdn(tsd),
-			    b0get(), sizeof(struct container_s), QUANTUM);
+			struct container_s *cont = (struct container_s *)
+			    base_alloc(tsd_tsdn(tsd), b0get(),
+			        sizeof(struct container_s), QUANTUM);
 			if (cont == NULL) {
 				return NULL;
 			}
@@ -1031,23 +1050,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 	ctl_arena->pdirty = 0;
 	ctl_arena->pmuzzy = 0;
 	if (config_stats) {
-		memset(&ctl_arena->astats->astats, 0, sizeof(arena_stats_t));
-		ctl_arena->astats->allocated_small = 0;
-		ctl_arena->astats->nmalloc_small = 0;
-		ctl_arena->astats->ndalloc_small = 0;
-		ctl_arena->astats->nrequests_small = 0;
-		ctl_arena->astats->nfills_small = 0;
-		ctl_arena->astats->nflushes_small = 0;
-		memset(ctl_arena->astats->bstats, 0, SC_NBINS *
-		    sizeof(bin_stats_data_t));
-		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
-		    sizeof(arena_stats_large_t));
-		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
-		    sizeof(pac_estats_t));
-		memset(&ctl_arena->astats->hpastats, 0,
-		    sizeof(hpa_shard_stats_t));
-		memset(&ctl_arena->astats->secstats, 0,
-		    sizeof(sec_stats_t));
+		memset(ctl_arena->astats, 0, sizeof(*(ctl_arena->astats)));
 	}
 }
 
@@ -1062,13 +1065,13 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
 		    ctl_arena->astats->lstats, ctl_arena->astats->estats,
-		    &ctl_arena->astats->hpastats, &ctl_arena->astats->secstats);
+		    &ctl_arena->astats->hpastats);
 
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
 			    &ctl_arena->astats->bstats[i].stats_data;
-			ctl_arena->astats->allocated_small += bstats->curregs *
-			    sz_index2size(i);
+			ctl_arena->astats->allocated_small += bstats->curregs
+			    * sz_index2size(i);
 			ctl_arena->astats->nmalloc_small += bstats->nmalloc;
 			ctl_arena->astats->ndalloc_small += bstats->ndalloc;
 			ctl_arena->astats->nrequests_small += bstats->nrequests;
@@ -1084,8 +1087,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 }
 
 static void
-ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
-    bool destroyed) {
+ctl_arena_stats_sdmerge(
+    ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, bool destroyed) {
 	unsigned i;
 
 	if (!destroyed) {
@@ -1106,48 +1109,59 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 
 		if (!destroyed) {
 			sdstats->astats.mapped += astats->astats.mapped;
-			sdstats->astats.pa_shard_stats.pac_stats.retained
-			    += astats->astats.pa_shard_stats.pac_stats.retained;
-			sdstats->astats.pa_shard_stats.edata_avail
-			    += astats->astats.pa_shard_stats.edata_avail;
+			sdstats->astats.pa_shard_stats.pac_stats.retained +=
+			    astats->astats.pa_shard_stats.pac_stats.retained;
+			sdstats->astats.pa_shard_stats.edata_avail +=
+			    astats->astats.pa_shard_stats.edata_avail;
 		}
 
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.purged,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                         .decay_dirty.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		        .npurge);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                         .decay_dirty.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		        .nmadvise);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                         .decay_dirty.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		        .purged);
 
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                         .decay_muzzy.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		        .npurge);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                         .decay_muzzy.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		        .nmadvise);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                         .decay_muzzy.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		        .purged);
 
-#define OP(mtx) malloc_mutex_prof_merge(				\
-		    &(sdstats->astats.mutex_prof_data[			\
-		        arena_prof_mutex_##mtx]),			\
-		    &(astats->astats.mutex_prof_data[			\
-		        arena_prof_mutex_##mtx]));
-MUTEX_PROF_ARENA_MUTEXES
+#define OP(mtx)                                                                \
+	malloc_mutex_prof_merge(                                               \
+	    &(sdstats->astats.mutex_prof_data[arena_prof_mutex_##mtx]),        \
+	    &(astats->astats.mutex_prof_data[arena_prof_mutex_##mtx]));
+		MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
 			sdstats->astats.base += astats->astats.base;
+			sdstats->astats.metadata_edata +=
+			    astats->astats.metadata_edata;
+			sdstats->astats.metadata_rtree +=
+			    astats->astats.metadata_rtree;
 			sdstats->astats.resident += astats->astats.resident;
-			sdstats->astats.metadata_thp += astats->astats.metadata_thp;
+			sdstats->astats.metadata_thp +=
+			    astats->astats.metadata_thp;
 			ctl_accum_atomic_zu(&sdstats->astats.internal,
 			    &astats->astats.internal);
 		} else {
 			assert(atomic_load_zu(
-			    &astats->astats.internal, ATOMIC_RELAXED) == 0);
+			           &astats->astats.internal, ATOMIC_RELAXED)
+			    == 0);
 		}
 
 		if (!destroyed) {
@@ -1169,8 +1183,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 		sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
 		sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
-		sdstats->astats.nrequests_large
-		    += astats->astats.nrequests_large;
+		sdstats->astats.nrequests_large +=
+		    astats->astats.nrequests_large;
 		sdstats->astats.nflushes_large += astats->astats.nflushes_large;
 		ctl_accum_atomic_zu(
 		    &sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm,
@@ -1231,19 +1245,18 @@ MUTEX_PROF_ARENA_MUTEXES
 		for (i = 0; i < SC_NPSIZES; i++) {
 			sdstats->estats[i].ndirty += astats->estats[i].ndirty;
 			sdstats->estats[i].nmuzzy += astats->estats[i].nmuzzy;
-			sdstats->estats[i].nretained
-			    += astats->estats[i].nretained;
-			sdstats->estats[i].dirty_bytes
-			    += astats->estats[i].dirty_bytes;
-			sdstats->estats[i].muzzy_bytes
-			    += astats->estats[i].muzzy_bytes;
-			sdstats->estats[i].retained_bytes
-			    += astats->estats[i].retained_bytes;
+			sdstats->estats[i].nretained +=
+			    astats->estats[i].nretained;
+			sdstats->estats[i].dirty_bytes +=
+			    astats->estats[i].dirty_bytes;
+			sdstats->estats[i].muzzy_bytes +=
+			    astats->estats[i].muzzy_bytes;
+			sdstats->estats[i].retained_bytes +=
+			    astats->estats[i].retained_bytes;
 		}
 
 		/* Merge HPA stats. */
 		hpa_shard_stats_accum(&sdstats->hpastats, &astats->hpastats);
-		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
 
@@ -1260,11 +1273,11 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
 
 static unsigned
 ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
-	unsigned arena_ind;
+	unsigned     arena_ind;
 	ctl_arena_t *ctl_arena;
 
-	if ((ctl_arena = ql_last(&ctl_arenas->destroyed, destroyed_link)) !=
-	    NULL) {
+	if ((ctl_arena = ql_last(&ctl_arenas->destroyed, destroyed_link))
+	    != NULL) {
 		ql_remove(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
 		arena_ind = ctl_arena->arena_ind;
 	} else {
@@ -1291,8 +1304,8 @@ ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
 static void
 ctl_background_thread_stats_read(tsdn_t *tsdn) {
 	background_thread_stats_t *stats = &ctl_stats->background_thread;
-	if (!have_background_thread ||
-	    background_thread_stats_read(tsdn, stats)) {
+	if (!have_background_thread
+	    || background_thread_stats_read(tsdn, stats)) {
 		memset(stats, 0, sizeof(background_thread_stats_t));
 		nstime_init_zero(&stats->run_interval);
 	}
@@ -1303,9 +1316,18 @@ ctl_background_thread_stats_read(tsdn_t *tsdn) {
 
 static void
 ctl_refresh(tsdn_t *tsdn) {
-	unsigned i;
+	malloc_mutex_assert_owner(tsdn, &ctl_mtx);
+	/*
+	 * We are guaranteed that `ctl_arenas->narenas` will not change
+	 * underneath us since we hold `ctl_mtx` for the duration of this
+	 * function. Unfortunately static analysis tools do not understand this,
+	 * so we are extracting `narenas` into a local variable solely for the
+	 * sake of exposing this information to such tools.
+	 */
+	const unsigned narenas = ctl_arenas->narenas;
+	assert(narenas > 0);
 	ctl_arena_t *ctl_sarena = arenas_i(MALLCTL_ARENAS_ALL);
-	VARIABLE_ARRAY(arena_t *, tarenas, ctl_arenas->narenas);
+	VARIABLE_ARRAY_UNSAFE(arena_t *, tarenas, narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
@@ -1313,41 +1335,45 @@ ctl_refresh(tsdn_t *tsdn) {
 	 */
 	ctl_arena_clear(ctl_sarena);
 
-	for (i = 0; i < ctl_arenas->narenas; i++) {
+	for (unsigned i = 0; i < narenas; i++) {
 		tarenas[i] = arena_get(tsdn, i, false);
 	}
 
-	for (i = 0; i < ctl_arenas->narenas; i++) {
+	for (unsigned i = 0; i < narenas; i++) {
 		ctl_arena_t *ctl_arena = arenas_i(i);
-		bool initialized = (tarenas[i] != NULL);
+		bool         initialized = (tarenas[i] != NULL);
 
 		ctl_arena->initialized = initialized;
 		if (initialized) {
-			ctl_arena_refresh(tsdn, tarenas[i], ctl_sarena, i,
-			    false);
+			ctl_arena_refresh(
+			    tsdn, tarenas[i], ctl_sarena, i, false);
 		}
 	}
 
 	if (config_stats) {
-		ctl_stats->allocated = ctl_sarena->astats->allocated_small +
-		    ctl_sarena->astats->astats.allocated_large;
+		ctl_stats->allocated = ctl_sarena->astats->allocated_small
+		    + ctl_sarena->astats->astats.allocated_large;
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
-		ctl_stats->metadata = ctl_sarena->astats->astats.base +
-		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
-			ATOMIC_RELAXED);
+		ctl_stats->metadata = ctl_sarena->astats->astats.base
+		    + atomic_load_zu(
+		        &ctl_sarena->astats->astats.internal, ATOMIC_RELAXED);
+		ctl_stats->metadata_edata =
+		    ctl_sarena->astats->astats.metadata_edata;
+		ctl_stats->metadata_rtree =
+		    ctl_sarena->astats->astats.metadata_rtree;
 		ctl_stats->resident = ctl_sarena->astats->astats.resident;
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
 		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
-		ctl_stats->retained = ctl_sarena->astats->astats
-		    .pa_shard_stats.pac_stats.retained;
+		ctl_stats->retained = ctl_sarena->astats->astats.pa_shard_stats
+		                          .pac_stats.retained;
 
 		ctl_background_thread_stats_read(tsdn);
 
-#define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx)				\
-    malloc_mutex_lock(tsdn, &mtx);					\
-    malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);	\
-    malloc_mutex_unlock(tsdn, &mtx);
+#define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx)                                    \
+	malloc_mutex_lock(tsdn, &mtx);                                         \
+	malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);    \
+	malloc_mutex_unlock(tsdn, &mtx);
 
 		if (config_prof && opt_prof) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -1370,9 +1396,9 @@ ctl_refresh(tsdn_t *tsdn) {
 			    global_prof_mutex_background_thread,
 			    background_thread_lock);
 		} else {
-			memset(&ctl_stats->mutex_prof_data[
-			    global_prof_mutex_background_thread], 0,
-			    sizeof(mutex_prof_data_t));
+			memset(&ctl_stats->mutex_prof_data
+			           [global_prof_mutex_background_thread],
+			    0, sizeof(mutex_prof_data_t));
 		}
 		/* We own ctl mutex already. */
 		malloc_mutex_prof_read(tsdn,
@@ -1385,21 +1411,21 @@ ctl_refresh(tsdn_t *tsdn) {
 
 static bool
 ctl_init(tsd_t *tsd) {
-	bool ret;
+	bool    ret;
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
 		ctl_arena_t *ctl_sarena, *ctl_darena;
-		unsigned i;
+		unsigned     i;
 
 		/*
 		 * Allocate demand-zeroed space for pointers to the full
 		 * range of supported arena indices.
 		 */
 		if (ctl_arenas == NULL) {
-			ctl_arenas = (ctl_arenas_t *)base_alloc(tsdn,
-			    b0get(), sizeof(ctl_arenas_t), QUANTUM);
+			ctl_arenas = (ctl_arenas_t *)base_alloc(
+			    tsdn, b0get(), sizeof(ctl_arenas_t), QUANTUM);
 			if (ctl_arenas == NULL) {
 				ret = true;
 				goto label_return;
@@ -1407,8 +1433,8 @@ ctl_init(tsd_t *tsd) {
 		}
 
 		if (config_stats && ctl_stats == NULL) {
-			ctl_stats = (ctl_stats_t *)base_alloc(tsdn, b0get(),
-			    sizeof(ctl_stats_t), QUANTUM);
+			ctl_stats = (ctl_stats_t *)base_alloc(
+			    tsdn, b0get(), sizeof(ctl_stats_t), QUANTUM);
 			if (ctl_stats == NULL) {
 				ret = true;
 				goto label_return;
@@ -1420,15 +1446,17 @@ ctl_init(tsd_t *tsd) {
 		 * here rather than doing it lazily elsewhere, in order
 		 * to limit when OOM-caused errors can occur.
 		 */
-		if ((ctl_sarena = arenas_i_impl(tsd, MALLCTL_ARENAS_ALL, false,
-		    true)) == NULL) {
+		if ((ctl_sarena = arenas_i_impl(
+		         tsd, MALLCTL_ARENAS_ALL, false, true))
+		    == NULL) {
 			ret = true;
 			goto label_return;
 		}
 		ctl_sarena->initialized = true;
 
-		if ((ctl_darena = arenas_i_impl(tsd, MALLCTL_ARENAS_DESTROYED,
-		    false, true)) == NULL) {
+		if ((ctl_darena = arenas_i_impl(
+		         tsd, MALLCTL_ARENAS_DESTROYED, false, true))
+		    == NULL) {
 			ret = true;
 			goto label_return;
 		}
@@ -1463,9 +1491,9 @@ static int
 ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
     const char *name, const ctl_named_node_t **ending_nodep, size_t *mibp,
     size_t *depthp) {
-	int ret;
-	const char *elm, *tdot, *dot;
-	size_t elen, i, j;
+	int                     ret;
+	const char             *elm, *tdot, *dot;
+	size_t                  elen, i, j;
 	const ctl_named_node_t *node;
 
 	elm = name;
@@ -1487,8 +1515,8 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 			for (j = 0; j < node->nchildren; j++) {
 				const ctl_named_node_t *child =
 				    ctl_named_children(node, j);
-				if (strlen(child->name) == elen &&
-				    strncmp(elm, child->name, elen) == 0) {
+				if (strlen(child->name) == elen
+				    && strncmp(elm, child->name, elen) == 0) {
 					node = child;
 					mibp[i] = j;
 					break;
@@ -1499,7 +1527,7 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 				goto label_return;
 			}
 		} else {
-			uintmax_t index;
+			uintmax_t                 index;
 			const ctl_indexed_node_t *inode;
 
 			/* Children are indexed. */
@@ -1537,8 +1565,8 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 
 		/* Update elm. */
 		elm = &dot[1];
-		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot :
-		    strchr(elm, '\0');
+		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot
+		                                          : strchr(elm, '\0');
 		elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
 	}
 	if (ending_nodep != NULL) {
@@ -1553,9 +1581,9 @@ label_return:
 int
 ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen) {
-	int ret;
-	size_t depth;
-	size_t mib[CTL_MAX_DEPTH];
+	int                     ret;
+	size_t                  depth;
+	size_t                  mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1564,8 +1592,8 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, &node, mib,
-	    &depth);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), super_root_node, name, &node, mib, &depth);
 	if (ret != 0) {
 		goto label_return;
 	}
@@ -1578,7 +1606,7 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
@@ -1590,10 +1618,10 @@ ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp) {
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, NULL, mibp,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), super_root_node, name, NULL, mibp, miblenp);
 label_return:
-	return(ret);
+	return (ret);
 }
 
 static int
@@ -1629,13 +1657,13 @@ ctl_lookupbymib(tsdn_t *tsdn, const ctl_named_node_t **ending_nodep,
 	ret = 0;
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
 ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1657,13 +1685,13 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
-ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp) {
-	int ret;
+ctl_mibnametomib(
+    tsd_t *tsd, size_t *mib, size_t miblen, const char *name, size_t *miblenp) {
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1683,17 +1711,17 @@ ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
 	assert(miblenp != NULL);
 	assert(*miblenp >= miblen);
 	*miblenp -= miblen;
-	ret = ctl_lookup(tsd_tsdn(tsd), node, name, NULL, mib + miblen,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), node, name, NULL, mib + miblen, miblenp);
 	*miblenp += miblen;
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
 ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
     size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1716,29 +1744,29 @@ ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
 	/*
 	 * The same node supplies the starting node and stores the ending node.
 	 */
-	ret = ctl_lookup(tsd_tsdn(tsd), node, name, &node, mib + miblen,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), node, name, &node, mib + miblen, miblenp);
 	*miblenp += miblen;
 	if (ret != 0) {
 		goto label_return;
 	}
 
 	if (node != NULL && node->ctl) {
-		ret = node->ctl(tsd, mib, *miblenp, oldp, oldlenp, newp,
-		    newlen);
+		ret = node->ctl(
+		    tsd, mib, *miblenp, oldp, oldlenp, newp, newlen);
 	} else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 bool
 ctl_boot(void) {
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -1770,218 +1798,201 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 /******************************************************************************/
 /* *_ctl() functions. */
 
-#define READONLY()	do {						\
-	if (newp != NULL || newlen != 0) {				\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define READONLY()                                                             \
+	do {                                                                   \
+		if (newp != NULL || newlen != 0) {                             \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
-#define WRITEONLY()	do {						\
-	if (oldp != NULL || oldlenp != NULL) {				\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define WRITEONLY()                                                            \
+	do {                                                                   \
+		if (oldp != NULL || oldlenp != NULL) {                         \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Can read or write, but not both. */
-#define READ_XOR_WRITE()	do {					\
-	if ((oldp != NULL && oldlenp != NULL) && (newp != NULL ||	\
-	    newlen != 0)) {						\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define READ_XOR_WRITE()                                                       \
+	do {                                                                   \
+		if ((oldp != NULL && oldlenp != NULL)                          \
+		    && (newp != NULL || newlen != 0)) {                        \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Can neither read nor write. */
-#define NEITHER_READ_NOR_WRITE()	do {				\
-	if (oldp != NULL || oldlenp != NULL || newp != NULL ||		\
-	    newlen != 0) {						\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define NEITHER_READ_NOR_WRITE()                                               \
+	do {                                                                   \
+		if (oldp != NULL || oldlenp != NULL || newp != NULL            \
+		    || newlen != 0) {                                          \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Verify that the space provided is enough. */
-#define VERIFY_READ(t)	do {						\
-	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(t)) {	\
-		*oldlenp = 0;						\
-		ret = EINVAL;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define VERIFY_READ(t)                                                         \
+	do {                                                                   \
+		if (oldp == NULL || oldlenp == NULL                            \
+		    || *oldlenp != sizeof(t)) {                                \
+			if (oldlenp != NULL) {                                 \
+				*oldlenp = 0;                                  \
+			}                                                      \
+			ret = EINVAL;                                          \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
-#define READ(v, t)	do {						\
-	if (oldp != NULL && oldlenp != NULL) {				\
-		if (*oldlenp != sizeof(t)) {				\
-			size_t	copylen = (sizeof(t) <= *oldlenp)	\
-			    ? sizeof(t) : *oldlenp;			\
-			memcpy(oldp, (void *)&(v), copylen);		\
-			*oldlenp = copylen;				\
-			ret = EINVAL;					\
-			goto label_return;				\
-		}							\
-		*(t *)oldp = (v);					\
-	}								\
-} while (0)
+#define READ(v, t)                                                             \
+	do {                                                                   \
+		if (oldp != NULL && oldlenp != NULL) {                         \
+			if (*oldlenp != sizeof(t)) {                           \
+				size_t copylen = (sizeof(t) <= *oldlenp)       \
+				    ? sizeof(t)                                \
+				    : *oldlenp;                                \
+				memcpy(oldp, (void *)&(v), copylen);           \
+				*oldlenp = copylen;                            \
+				ret = EINVAL;                                  \
+				goto label_return;                             \
+			}                                                      \
+			*(t *)oldp = (v);                                      \
+		}                                                              \
+	} while (0)
 
-#define WRITE(v, t)	do {						\
-	if (newp != NULL) {						\
-		if (newlen != sizeof(t)) {				\
-			ret = EINVAL;					\
-			goto label_return;				\
-		}							\
-		(v) = *(t *)newp;					\
-	}								\
-} while (0)
+#define WRITE(v, t)                                                            \
+	do {                                                                   \
+		if (newp != NULL) {                                            \
+			if (newlen != sizeof(t)) {                             \
+				ret = EINVAL;                                  \
+				goto label_return;                             \
+			}                                                      \
+			(v) = *(t *)newp;                                      \
+		}                                                              \
+	} while (0)
 
-#define ASSURED_WRITE(v, t)	do {					\
-	if (newp == NULL || newlen != sizeof(t)) {			\
-		ret = EINVAL;						\
-		goto label_return;					\
-	}								\
-	(v) = *(t *)newp;						\
-} while (0)
+#define ASSURED_WRITE(v, t)                                                    \
+	do {                                                                   \
+		if (newp == NULL || newlen != sizeof(t)) {                     \
+			ret = EINVAL;                                          \
+			goto label_return;                                     \
+		}                                                              \
+		(v) = *(t *)newp;                                              \
+	} while (0)
 
-#define MIB_UNSIGNED(v, i) do {						\
-	if (mib[i] > UINT_MAX) {					\
-		ret = EFAULT;						\
-		goto label_return;					\
-	}								\
-	v = (unsigned)mib[i];						\
-} while (0)
+#define MIB_UNSIGNED(v, i)                                                     \
+	do {                                                                   \
+		if (mib[i] > UINT_MAX) {                                       \
+			ret = EFAULT;                                          \
+			goto label_return;                                     \
+		}                                                              \
+		v = (unsigned)mib[i];                                          \
+	} while (0)
 
 /*
  * There's a lot of code duplication in the following macros due to limitations
  * in how nested cpp macros are expanded.
  */
-#define CTL_RO_CLGEN(c, l, n, v, t)					\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	if (l) {							\
-		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
-	}								\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	if (l) {							\
-		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
-	}								\
-	return ret;							\
-}
+#define CTL_RO_CGEN(c, n, v, t)                                                \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		if (!(c)) {                                                    \
+			return ENOENT;                                         \
+		}                                                              \
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);                    \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);                  \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_CGEN(c, n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
-	return ret;							\
-}
-
-#define CTL_RO_GEN(n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
-	int ret;							\
-	t oldval;							\
-									\
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
-	return ret;							\
-}
+#define CTL_RO_GEN(n, v, t)                                                    \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);                    \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);                  \
+		return ret;                                                    \
+	}
 
 /*
  * ctl_mtx is not acquired, under the assumption that no pertinent data will
  * mutate during the call.
  */
-#define CTL_RO_NL_CGEN(c, n, v, t)					\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_NL_CGEN(c, n, v, t)                                             \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		if (!(c)) {                                                    \
+			return ENOENT;                                         \
+		}                                                              \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_NL_GEN(n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_NL_GEN(n, v, t)                                                 \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_CONFIG_GEN(n, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	READONLY();							\
-	oldval = n;							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_CONFIG_GEN(n, t)                                                \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		READONLY();                                                    \
+		oldval = n;                                                    \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
 /******************************************************************************/
 
 CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
-epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int             ret;
 	UNUSED uint64_t newval;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -1998,10 +2009,9 @@ label_return:
 }
 
 static int
-background_thread_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen) {
-	int ret;
+background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!have_background_thread) {
@@ -2050,10 +2060,9 @@ label_return:
 }
 
 static int
-max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t oldval;
 
 	if (!have_background_thread) {
@@ -2079,7 +2088,7 @@ max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
 			ret = 0;
 			goto label_return;
 		}
-		if (newval > opt_max_background_threads) {
+		if (newval > opt_max_background_threads || newval == 0) {
 			ret = EINVAL;
 			goto label_return;
 		}
@@ -2119,6 +2128,7 @@ CTL_RO_CONFIG_GEN(config_opt_safety_checks, bool)
 CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
+CTL_RO_CONFIG_GEN(config_prof_frameptr, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_xmalloc, bool)
@@ -2128,17 +2138,30 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
+CTL_RO_NL_GEN(
+    opt_debug_double_free_max_scan, opt_debug_double_free_max_scan, unsigned)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_start_huge_if_thp_always,
+    opt_experimental_hpa_start_huge_if_thp_always, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_enforce_hugify,
+    opt_experimental_hpa_enforce_hugify, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
 /* HPA options. */
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
-    opt_hpa_opts.hugification_threshold, size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_hugification_threshold, opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
-CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
-    uint64_t)
-
+CTL_RO_NL_GEN(opt_hpa_hugify_sync, opt_hpa_opts.hugify_sync, bool)
+CTL_RO_NL_GEN(
+    opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms, uint64_t)
+CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
+    opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
+CTL_RO_NL_GEN(opt_hpa_purge_threshold, opt_hpa_opts.purge_threshold, size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_min_purge_delay_ms, opt_hpa_opts.min_purge_delay_ms, uint64_t)
+CTL_RO_NL_GEN(opt_hpa_hugify_style,
+    hpa_hugify_style_names[opt_hpa_opts.hugify_style], const char *)
 /*
  * This will have to change before we publicly document this option; fxp_t and
  * its representation are internal implementation details.
@@ -2150,18 +2173,16 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush,
-    size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra,
-    size_t)
-
-CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
-    const char *)
+CTL_RO_NL_GEN(
+    opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra, size_t)
+CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
+CTL_RO_NL_GEN(
+    opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
-CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
-    const char *)
+CTL_RO_NL_GEN(
+    opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], const char *)
 CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t)
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
@@ -2178,53 +2199,72 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
+CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
-CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
-    unsigned)
-CTL_RO_NL_GEN(opt_tcache_nslots_small_max, opt_tcache_nslots_small_max,
-    unsigned)
+CTL_RO_NL_GEN(
+    opt_tcache_nslots_small_min, opt_tcache_nslots_small_min, unsigned)
+CTL_RO_NL_GEN(
+    opt_tcache_nslots_small_max, opt_tcache_nslots_small_max, unsigned)
 CTL_RO_NL_GEN(opt_tcache_nslots_large, opt_tcache_nslots_large, unsigned)
 CTL_RO_NL_GEN(opt_lg_tcache_nslots_mul, opt_lg_tcache_nslots_mul, ssize_t)
 CTL_RO_NL_GEN(opt_tcache_gc_incr_bytes, opt_tcache_gc_incr_bytes, size_t)
 CTL_RO_NL_GEN(opt_tcache_gc_delay_bytes, opt_tcache_gc_delay_bytes, size_t)
-CTL_RO_NL_GEN(opt_lg_tcache_flush_small_div, opt_lg_tcache_flush_small_div,
-    unsigned)
-CTL_RO_NL_GEN(opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div,
-    unsigned)
+CTL_RO_NL_GEN(
+    opt_lg_tcache_flush_small_div, opt_lg_tcache_flush_small_div, unsigned)
+CTL_RO_NL_GEN(
+    opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div, unsigned)
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
-CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
-    size_t)
+CTL_RO_NL_GEN(
+    opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit, size_t)
+CTL_RO_NL_GEN(
+    opt_process_madvise_max_batch, opt_process_madvise_max_batch, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
-    opt_prof_thread_active_init, bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_thread_active_init, opt_prof_thread_active_init, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace, bool)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak_error, opt_prof_leak_error, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
-    opt_prof_recent_alloc_max, ssize_t)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_recent_alloc_max, opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_stats, opt_prof_stats, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
-    bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
     prof_time_res_mode_names[opt_prof_time_res], const char *)
-CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
-    opt_lg_san_uaf_align, ssize_t)
+CTL_RO_NL_CGEN(
+    config_uaf_detection, opt_lg_san_uaf_align, opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
+CTL_RO_NL_GEN(
+    opt_disable_large_size_classes, opt_disable_large_size_classes, bool)
+
+/* malloc_conf options */
+CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
+    opt_malloc_conf_symlink, const char *)
+CTL_RO_NL_CGEN(opt_malloc_conf_env_var, opt_malloc_conf_env_var,
+    opt_malloc_conf_env_var, const char *)
+CTL_RO_NL_CGEN(
+    je_malloc_conf, opt_malloc_conf_global_var, je_malloc_conf, const char *)
+CTL_RO_NL_CGEN(je_malloc_conf_2_conf_harder,
+    opt_malloc_conf_global_var_2_conf_harder, je_malloc_conf_2_conf_harder,
+    const char *)
 
 /******************************************************************************/
 
 static int
-thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
@@ -2245,8 +2285,8 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 
-		if (have_percpu_arena &&
-		    PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
+		if (have_percpu_arena
+		    && PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
 			if (newind < percpu_arena_ind_limit(opt_percpu_arena)) {
 				/*
 				 * If perCPU arena is enabled, thread_arena
@@ -2280,14 +2320,83 @@ label_return:
 
 CTL_RO_NL_GEN(thread_allocated, tsd_thread_allocated_get(tsd), uint64_t)
 CTL_RO_NL_GEN(thread_allocatedp, tsd_thread_allocatedp_get(tsd), uint64_t *)
+
+static int
+thread_tcache_ncached_max_read_sizeclass_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
+	size_t bin_size = 0;
+
+	/* Read the bin size from newp. */
+	if (newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	WRITE(bin_size, size_t);
+
+	cache_bin_sz_t ncached_max = 0;
+	if (tcache_bin_ncached_max_read(tsd, bin_size, &ncached_max)) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	size_t result = (size_t)ncached_max;
+	READ(result, size_t);
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+thread_tcache_ncached_max_write_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	WRITEONLY();
+	if (newp != NULL) {
+		if (!tcache_available(tsd)) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		char *settings = NULL;
+		WRITE(settings, char *);
+		if (settings == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		/* Get the length of the setting string safely. */
+		char *end = (char *)memchr(
+		    settings, '\0', CTL_MULTI_SETTING_MAX_LEN);
+		if (end == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		/*
+		 * Exclude the last '\0' for len since it is not handled by
+		 * multi_setting_parse_next.
+		 */
+		size_t len = (uintptr_t)end - (uintptr_t)settings;
+		if (len == 0) {
+			ret = 0;
+			goto label_return;
+		}
+
+		if (tcache_bins_ncached_max_write(tsd, settings, len)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 CTL_RO_NL_GEN(thread_deallocated, tsd_thread_deallocated_get(tsd), uint64_t)
 CTL_RO_NL_GEN(thread_deallocatedp, tsd_thread_deallocatedp_get(tsd), uint64_t *)
 
 static int
-thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	oldval = tcache_enabled_get(tsd);
@@ -2306,9 +2415,41 @@ label_return:
 }
 
 static int
-thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
+	size_t oldval;
+
+	/* pointer to tcache_t always exists even with tcache disabled. */
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	oldval = tcache_max_get(tcache->tcache_slow);
+	READ(oldval, size_t);
+
+	if (newp != NULL) {
+		if (newlen != sizeof(size_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		size_t new_tcache_max = oldval;
+		WRITE(new_tcache_max, size_t);
+		if (new_tcache_max > TCACHE_MAXCLASS_LIMIT) {
+			new_tcache_max = TCACHE_MAXCLASS_LIMIT;
+		}
+		new_tcache_max = sz_s2u(new_tcache_max);
+		if (new_tcache_max != oldval) {
+			thread_tcache_max_set(tsd, new_tcache_max);
+		}
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (!tcache_available(tsd)) {
@@ -2326,9 +2467,8 @@ label_return:
 }
 
 static int
-thread_peak_read_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_peak_read_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	if (!config_stats) {
 		return ENOENT;
@@ -2343,9 +2483,8 @@ label_return:
 }
 
 static int
-thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	if (!config_stats) {
 		return ENOENT;
@@ -2358,9 +2497,8 @@ label_return:
 }
 
 static int
-thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (!config_prof || !opt_prof) {
@@ -2370,13 +2508,13 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
 	READ_XOR_WRITE();
 
 	if (newp != NULL) {
-		if (newlen != sizeof(const char *)) {
+		const char *newval = *(const char **)newp;
+		if (newlen != sizeof(const char *) || newval == NULL) {
 			ret = EINVAL;
 			goto label_return;
 		}
 
-		if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
-		    0) {
+		if ((ret = prof_thread_name_set(tsd, newval)) != 0) {
 			goto label_return;
 		}
 	} else {
@@ -2390,10 +2528,9 @@ label_return:
 }
 
 static int
-thread_prof_active_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -2423,9 +2560,8 @@ label_return:
 }
 
 static int
-thread_idle_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_idle_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2458,9 +2594,9 @@ label_return:
 /******************************************************************************/
 
 static int
-tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	READONLY();
@@ -2477,9 +2613,9 @@ label_return:
 }
 
 static int
-tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	WRITEONLY();
@@ -2492,9 +2628,9 @@ label_return:
 }
 
 static int
-tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	WRITEONLY();
@@ -2511,10 +2647,10 @@ label_return:
 static int
 arena_i_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	tsdn_t *tsdn = tsd_tsdn(tsd);
+	int      ret;
+	tsdn_t  *tsdn = tsd_tsdn(tsd);
 	unsigned arena_ind;
-	bool initialized;
+	bool     initialized;
 
 	READONLY();
 	MIB_UNSIGNED(arena_ind, 1);
@@ -2542,7 +2678,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 		 */
 		if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind == narenas) {
 			unsigned i;
-			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
+			VARIABLE_ARRAY_UNSAFE(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++) {
 				tarenas[i] = arena_get(tsdn, i, false);
@@ -2556,8 +2692,8 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL) {
-					arena_decay(tsdn, tarenas[i], false,
-					    all);
+					arena_decay(
+					    tsdn, tarenas[i], false, all);
 				}
 			}
 		} else {
@@ -2580,7 +2716,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 static int
 arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2595,7 +2731,7 @@ label_return:
 static int
 arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2661,12 +2797,12 @@ arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
 static int
 arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
-	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
-	    newp, newlen, &arena_ind, &arena);
+	ret = arena_i_reset_destroy_helper(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
 		return ret;
 	}
@@ -2681,21 +2817,21 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 static int
 arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned arena_ind;
-	arena_t *arena;
+	int          ret;
+	unsigned     arena_ind;
+	arena_t     *arena;
 	ctl_arena_t *ctl_darena, *ctl_arena;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
-	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
-	    newp, newlen, &arena_ind, &arena);
+	ret = arena_i_reset_destroy_helper(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
 		goto label_return;
 	}
 
-	if (arena_nthreads_get(arena, false) != 0 || arena_nthreads_get(arena,
-	    true) != 0) {
+	if (arena_nthreads_get(arena, false) != 0
+	    || arena_nthreads_get(arena, true) != 0) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -2726,17 +2862,16 @@ label_return:
 static int
 arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int         ret;
 	const char *dss = NULL;
-	unsigned arena_ind;
-	dss_prec_t dss_prec_old = dss_prec_limit;
-	dss_prec_t dss_prec = dss_prec_limit;
+	unsigned    arena_ind;
+	dss_prec_t  dss_prec = dss_prec_limit;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(dss, const char *);
 	MIB_UNSIGNED(arena_ind, 1);
 	if (dss != NULL) {
-		int i;
+		int  i;
 		bool match = false;
 
 		for (i = 0; i < dss_prec_limit; i++) {
@@ -2757,18 +2892,20 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	 * Access via index narenas is deprecated, and scheduled for removal in
 	 * 6.0.0.
 	 */
-	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind ==
-	    ctl_arenas->narenas) {
-		if (dss_prec != dss_prec_limit &&
-		    extent_dss_prec_set(dss_prec)) {
+	dss_prec_t dss_prec_old;
+	if (arena_ind == MALLCTL_ARENAS_ALL
+	    || arena_ind == ctl_arenas->narenas) {
+		if (dss_prec != dss_prec_limit
+		    && extent_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
 		dss_prec_old = extent_dss_prec_get();
 	} else {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
-		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(arena, dss_prec))) {
+		if (arena == NULL
+		    || (dss_prec != dss_prec_limit
+		        && arena_dss_prec_set(arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -2819,7 +2956,7 @@ label_return:
 static int
 arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -2840,20 +2977,9 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_is_huge(arena_ind) && *(ssize_t *)newp > 0) {
-			/*
-			 * By default the huge arena purges eagerly.  If it is
-			 * set to non-zero decay time afterwards, background
-			 * thread might be needed.
-			 */
-			if (background_thread_create(tsd, arena_ind)) {
-				ret = EFAULT;
-				goto label_return;
-			}
-		}
 
-		if (arena_decay_ms_set(tsd_tsdn(tsd), arena, state,
-		    *(ssize_t *)newp)) {
+		if (arena_decay_ms_set(
+		        tsd_tsdn(tsd), arena, state, *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -2867,21 +2993,21 @@ label_return:
 static int
 arena_i_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, true);
+	return arena_i_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, true);
 }
 
 static int
 arena_i_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, false);
+	return arena_i_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, false);
 }
 
 static int
 arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -2906,8 +3032,8 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				arena_config_t config = arena_config_default;
 				config.extent_hooks = new_extent_hooks;
 
-				arena = arena_init(tsd_tsdn(tsd), arena_ind,
-				    &config);
+				arena = arena_init(
+				    tsd_tsdn(tsd), arena_ind, &config);
 				if (arena == NULL) {
 					ret = EFAULT;
 					goto label_return;
@@ -2918,13 +3044,12 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				extent_hooks_t *new_extent_hooks
 				    JEMALLOC_CC_SILENCE_INIT(NULL);
 				WRITE(new_extent_hooks, extent_hooks_t *);
-				old_extent_hooks = arena_set_extent_hooks(tsd,
-				    arena, new_extent_hooks);
+				old_extent_hooks = arena_set_extent_hooks(
+				    tsd, arena, new_extent_hooks);
 				READ(old_extent_hooks, extent_hooks_t *);
 			} else {
-				old_extent_hooks =
-				    ehooks_get_extent_hooks_ptr(
-					arena_get_ehooks(arena));
+				old_extent_hooks = ehooks_get_extent_hooks_ptr(
+				    arena_get_ehooks(arena));
 				READ(old_extent_hooks, extent_hooks_t *);
 			}
 		}
@@ -2939,10 +3064,9 @@ label_return:
 }
 
 static int
-arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -2953,14 +3077,14 @@ arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	MIB_UNSIGNED(arena_ind, 1);
-	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+	if (arena_ind < narenas_total_get()
+	    && (arena = arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		size_t old_limit, new_limit;
 		if (newp != NULL) {
 			WRITE(new_limit, size_t);
 		}
-		bool err = arena_retain_grow_limit_get_set(tsd, arena,
-		    &old_limit, newp != NULL ? &new_limit : NULL);
+		bool err = arena_retain_grow_limit_get_set(
+		    tsd, arena, &old_limit, newp != NULL ? &new_limit : NULL);
 		if (!err) {
 			READ(old_limit, size_t);
 			ret = 0;
@@ -2975,9 +3099,63 @@ label_return:
 	return ret;
 }
 
+/*
+ * When writing, newp should point to a char array storing the name to be set.
+ * A name longer than ARENA_NAME_LEN will be arbitrarily cut. When reading,
+ * oldp should point to a char array whose length is no shorter than
+ * ARENA_NAME_LEN or the length of the name when it was set.
+ */
+static int
+arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int        ret;
+	unsigned   arena_ind;
+	char *name JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	MIB_UNSIGNED(arena_ind, 1);
+	if (arena_ind == MALLCTL_ARENAS_ALL
+	    || arena_ind >= ctl_arenas->narenas) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+	if (arena == NULL) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	if (oldp != NULL && oldlenp != NULL) {
+		/*
+		 * Read the arena name.  When reading, the input oldp should
+		 * point to an array with a length no shorter than
+		 * ARENA_NAME_LEN or the length when it was set.
+		 */
+		if (*oldlenp != sizeof(char *)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		name = *(char **)oldp;
+		arena_name_get(arena, name);
+	}
+
+	if (newp != NULL) {
+		/* Write the arena name. */
+		WRITE(name, char *);
+		if (name == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		arena_name_set(arena, name);
+	}
+	ret = 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
 static const ctl_named_node_t *
-arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -3002,9 +3180,9 @@ label_return:
 /******************************************************************************/
 
 static int
-arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned narenas;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3019,14 +3197,13 @@ label_return:
 }
 
 static int
-arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen, bool dirty) {
+arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = (dirty ? arena_dirty_decay_ms_default_get() :
-		    arena_muzzy_decay_ms_default_get());
+		size_t oldval = (dirty ? arena_dirty_decay_ms_default_get()
+		                       : arena_muzzy_decay_ms_default_get());
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -3034,8 +3211,9 @@ arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
-		    : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
+		if (dirty
+		        ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
+		        : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -3049,51 +3227,51 @@ label_return:
 static int
 arenas_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, true);
+	return arenas_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, true);
 }
 
 static int
 arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, false);
+	return arenas_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, false);
 }
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
-CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
+CTL_RO_NL_GEN(arenas_hugepage, HUGEPAGE, size_t)
+CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
+CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_tcache_nbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nshards, bin_infos[mib[2]].n_shards, uint32_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
-	if (i > SC_NBINS) {
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+	if (i >= SC_NBINS) {
 		return NULL;
 	}
 	return super_arenas_bin_i_node;
 }
 
 CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(SC_NBINS+(szind_t)mib[2]),
-    size_t)
+CTL_RO_NL_GEN(arenas_lextent_i_size,
+    sz_index2size_unsafe(SC_NBINS + (szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
-	if (i > SC_NSIZES - SC_NBINS) {
+arenas_lextent_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+	if (i >= SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
 	return super_arenas_lextent_i_node;
 }
 
 static int
-arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3114,10 +3292,9 @@ label_return:
 }
 
 static int
-experimental_arenas_create_ext_ctl(tsd_t *tsd,
-    const size_t *mib, size_t miblen,
+experimental_arenas_create_ext_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3138,25 +3315,26 @@ label_return:
 }
 
 static int
-arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
-	unsigned arena_ind;
-	void *ptr;
-	edata_t *edata;
-	arena_t *arena;
+arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int                   ret;
+	unsigned              arena_ind;
+	void                 *ptr;
+	emap_full_alloc_ctx_t alloc_ctx;
+	bool                  ptr_not_present;
+	arena_t              *arena;
 
 	ptr = NULL;
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr);
-	if (edata == NULL) {
+	ptr_not_present = emap_full_alloc_ctx_try_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
+	if (ptr_not_present || alloc_ctx.edata == NULL) {
 		goto label_return;
 	}
 
-	arena = arena_get_from_edata(edata);
+	arena = arena_get_from_edata(alloc_ctx.edata);
 	if (arena == NULL) {
 		goto label_return;
 	}
@@ -3173,10 +3351,9 @@ label_return:
 /******************************************************************************/
 
 static int
-prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3192,11 +3369,11 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
-		    *(bool *)newp);
+		oldval = prof_thread_active_init_set(
+		    tsd_tsdn(tsd), *(bool *)newp);
 	} else {
-		oldval = opt_prof ? prof_thread_active_init_get(tsd_tsdn(tsd)) :
-		    false;
+		oldval = opt_prof ? prof_thread_active_init_get(tsd_tsdn(tsd))
+		                  : false;
 	}
 	READ(oldval, bool);
 
@@ -3206,9 +3383,9 @@ label_return:
 }
 
 static int
-prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3244,9 +3421,9 @@ label_return:
 }
 
 static int
-prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int         ret;
 	const char *filename = NULL;
 
 	if (!config_prof || !opt_prof) {
@@ -3267,9 +3444,9 @@ label_return:
 }
 
 static int
-prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3297,9 +3474,9 @@ label_return:
 }
 
 static int
-prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int         ret;
 	const char *prefix = NULL;
 
 	if (!config_prof || !opt_prof) {
@@ -3317,9 +3494,9 @@ label_return:
 }
 
 static int
-prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t lg_sample = lg_prof_sample;
 
 	if (!config_prof || !opt_prof) {
@@ -3390,8 +3567,7 @@ experimental_hooks_prof_backtrace_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_backtrace_hook_t old_hook =
-		    prof_backtrace_hook_get();
+		prof_backtrace_hook_t old_hook = prof_backtrace_hook_get();
 		READ(old_hook, prof_backtrace_hook_t);
 	}
 	if (newp != NULL) {
@@ -3413,8 +3589,8 @@ label_return:
 }
 
 static int
-experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (oldp == NULL && newp == NULL) {
@@ -3422,8 +3598,7 @@ experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_dump_hook_t old_hook =
-		    prof_dump_hook_get();
+		prof_dump_hook_t old_hook = prof_dump_hook_get();
 		READ(old_hook, prof_dump_hook_t);
 	}
 	if (newp != NULL) {
@@ -3440,6 +3615,78 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_sample_hook_t old_hook = prof_sample_hook_get();
+		READ(old_hook, prof_sample_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_sample_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_sample_hook_t);
+		prof_sample_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+experimental_hooks_prof_sample_free_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_sample_free_hook_t old_hook = prof_sample_free_hook_get();
+		READ(old_hook, prof_sample_free_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_sample_free_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_sample_free_hook_t);
+		prof_sample_free_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+experimental_hooks_thread_event_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+
+	user_hook_object_t t_new = {NULL, 0, false};
+	WRITE(t_new, user_hook_object_t);
+	ret = te_register_user_handler(tsd_tsdn(tsd), &t_new);
+
+label_return:
+	return ret;
+}
+
 /* For integration test purpose only.  No plan to move out of experimental. */
 static int
 experimental_hooks_safety_check_abort_ctl(tsd_t *tsd, const size_t *mib,
@@ -3466,6 +3713,10 @@ label_return:
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
+CTL_RO_CGEN(
+    config_stats, stats_metadata_edata, ctl_stats->metadata_edata, size_t)
+CTL_RO_CGEN(
+    config_stats, stats_metadata_rtree, ctl_stats->metadata_rtree, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
@@ -3481,11 +3732,50 @@ CTL_RO_CGEN(config_stats, stats_background_thread_run_interval,
 CTL_RO_CGEN(config_stats, stats_zero_reallocs,
     atomic_load_zu(&zero_realloc_count, ATOMIC_RELAXED), size_t)
 
+/*
+ * approximate_stats.active returns a result that is informative itself,
+ * but the returned value SHOULD NOT be compared against other stats retrieved.
+ * For instance, approximate_stats.active should not be compared against
+ * any stats, e.g., stats.active or stats.resident, because there is no
+ * guarantee in the comparison results.  Results returned by stats.*, on the
+ * other hand, provides such guarantees, i.e., stats.active <= stats.resident,
+ * as long as epoch is called right before the queries.
+ */
+
+static int
+approximate_stats_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
+	size_t approximate_nactive = 0;
+	size_t approximate_active_bytes = 0;
+
+	READONLY();
+
+	tsdn_t  *tsdn = tsd_tsdn(tsd);
+	unsigned n = narenas_total_get();
+
+	for (unsigned i = 0; i < n; i++) {
+		arena_t *arena = arena_get(tsdn, i, false);
+		if (!arena) {
+			continue;
+		}
+		/* Accumulate nactive pages from each arena's pa_shard */
+		approximate_nactive += pa_shard_nactive(&arena->pa_shard);
+	}
+
+	approximate_active_bytes = approximate_nactive << LG_PAGE;
+	READ(approximate_active_bytes, size_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
-CTL_RO_GEN(stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms,
-    ssize_t)
-CTL_RO_GEN(stats_arenas_i_muzzy_decay_ms, arenas_i(mib[2])->muzzy_decay_ms,
-    ssize_t)
+CTL_RO_GEN(
+    stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms, ssize_t)
+CTL_RO_GEN(
+    stats_arenas_i_muzzy_decay_ms, arenas_i(mib[2])->muzzy_decay_ms, ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_uptime,
     nstime_ns(&arenas_i(mib[2])->astats->astats.uptime), uint64_t)
@@ -3500,37 +3790,40 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
-    arenas_i(mib[2])->astats->astats.base,
-    size_t)
+    arenas_i(mib[2])->astats->astats.base, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_edata,
+    arenas_i(mib[2])->astats->astats.metadata_edata, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_rtree,
+    arenas_i(mib[2])->astats->astats.metadata_rtree, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
     arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
@@ -3538,15 +3831,25 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_stashed_bytes,
     arenas_i(mib[2])->astats->astats.tcache_stashed_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
-    arenas_i(mib[2])->astats->astats.resident,
-    size_t)
+    arenas_i(mib[2])->astats->astats.resident, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     atomic_load_zu(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
-    ATOMIC_RELAXED), size_t)
+        &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
+        ATOMIC_RELAXED),
+    size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
-    arenas_i(mib[2])->astats->secstats.bytes, size_t)
+    arenas_i(mib[2])->astats->hpastats.secstats.bytes, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_hits,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.nhits, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_misses,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.nmisses, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_flush,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.ndalloc_flush, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_noflush,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.ndalloc_noflush, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_overfills,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.noverfills, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     arenas_i(mib[2])->astats->allocated_small, size_t)
@@ -3578,55 +3881,55 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
     arenas_i(mib[2])->astats->astats.nflushes_large, uint64_t)
 
 /* Lock profiling related APIs below. */
-#define RO_MUTEX_CTL_GEN(n, l)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_ops,				\
-    l.n_lock_ops, uint64_t)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_wait,				\
-    l.n_wait_times, uint64_t)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq,			\
-    l.n_spin_acquired, uint64_t)					\
-CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch,			\
-    l.n_owner_switches, uint64_t) 					\
-CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,			\
-    nstime_ns(&l.tot_wait_time), uint64_t)				\
-CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,			\
-    nstime_ns(&l.max_wait_time), uint64_t)				\
-CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
-    l.max_n_thds, uint32_t)
+#define RO_MUTEX_CTL_GEN(n, l)                                                 \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_ops, l.n_lock_ops, uint64_t) \
+	CTL_RO_CGEN(                                                           \
+	    config_stats, stats_##n##_num_wait, l.n_wait_times, uint64_t)      \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq, l.n_spin_acquired, \
+	    uint64_t)                                                          \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch,                \
+	    l.n_owner_switches, uint64_t)                                      \
+	CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,                 \
+	    nstime_ns(&l.tot_wait_time), uint64_t)                             \
+	CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,                   \
+	    nstime_ns(&l.max_wait_time), uint64_t)                             \
+	CTL_RO_CGEN(                                                           \
+	    config_stats, stats_##n##_max_num_thds, l.max_n_thds, uint32_t)
 
 /* Global mutexes. */
-#define OP(mtx)								\
-    RO_MUTEX_CTL_GEN(mutexes_##mtx,					\
-        ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
+#define OP(mtx)                                                                \
+	RO_MUTEX_CTL_GEN(mutexes_##mtx,                                        \
+	    ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
 MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 
 /* Per arena mutexes */
-#define OP(mtx) RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx,		\
-    arenas_i(mib[2])->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
+#define OP(mtx)                                                                \
+	RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx,                               \
+	    arenas_i(mib[2])                                                   \
+	        ->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 /* tcache bin mutex */
-RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex,
-    arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
+RO_MUTEX_CTL_GEN(
+    arenas_i_bins_j_mutex, arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
 #undef RO_MUTEX_CTL_GEN
 
 /* Resets all mutex stats, including global, arena and bin mutexes. */
 static int
-stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen) {
+stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	if (!config_stats) {
 		return ENOENT;
 	}
 
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 
-#define MUTEX_PROF_RESET(mtx)						\
-    malloc_mutex_lock(tsdn, &mtx);					\
-    malloc_mutex_prof_data_reset(tsdn, &mtx);				\
-    malloc_mutex_unlock(tsdn, &mtx);
+#define MUTEX_PROF_RESET(mtx)                                                  \
+	malloc_mutex_lock(tsdn, &mtx);                                         \
+	malloc_mutex_prof_data_reset(tsdn, &mtx);                              \
+	malloc_mutex_unlock(tsdn, &mtx);
 
 	/* Global mutexes: ctl and prof. */
 	MUTEX_PROF_RESET(ctl_mtx);
@@ -3693,9 +3996,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
-	if (j > SC_NBINS) {
+stats_arenas_i_bins_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
+	if (j >= SC_NBINS) {
 		return NULL;
 	}
 	return super_stats_arenas_i_bins_j_node;
@@ -3703,117 +4006,173 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
-	if (j > SC_NSIZES - SC_NBINS) {
+stats_arenas_i_lextents_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
+	if (j >= SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
 	return super_stats_arenas_i_lextents_j_node;
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_ndirty,
-        arenas_i(mib[2])->astats->estats[mib[4]].ndirty, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].ndirty, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nmuzzy,
-        arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nretained,
-        arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_dirty_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_muzzy_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_retained_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t);
 
 static const ctl_named_node_t *
-stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_extents_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j >= SC_NPSIZES) {
 		return NULL;
 	}
 	return super_stats_arenas_i_extents_j_node;
 }
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndirty,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.ndirty, size_t);
+
+/* Nonhuge slabs */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].ndirty, size_t);
+
+/* Huge slabs */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].ndirty, size_t);
+
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
-    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes,
+    uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurges, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugifies, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugify_failures,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugify_failures,
+    uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
 /* Full, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ndirty,
+    size_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty,
+    size_t);
 
 /* Empty, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].ndirty,
+    size_t);
 
 /* Empty, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].ndirty,
+    size_t);
 
 /* Nonfull, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].npageslabs,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .npageslabs,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .nactive,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ndirty,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .ndirty,
     size_t);
 
 /* Nonfull, huge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].npageslabs,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .nactive,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ndirty,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .ndirty,
     size_t);
 
 static const ctl_named_node_t *
-stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_hpa_shard_nonfull_slabs_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j >= PSSET_NPSIZES) {
 		return NULL;
 	}
@@ -3831,8 +4190,7 @@ ctl_arenas_i_verify(size_t i) {
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -3851,7 +4209,7 @@ static int
 experimental_hooks_install_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
-	if (oldp == NULL || oldlenp == NULL|| newp == NULL) {
+	if (oldp == NULL || oldlenp == NULL || newp == NULL) {
 		ret = EINVAL;
 		goto label_return;
 	}
@@ -3892,32 +4250,6 @@ label_return:
 	return ret;
 }
 
-static int
-experimental_thread_activity_callback_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-
-	if (!config_stats) {
-		return ENOENT;
-	}
-
-	activity_callback_thunk_t t_old = tsd_activity_callback_thunk_get(tsd);
-	READ(t_old, activity_callback_thunk_t);
-
-	if (newp != NULL) {
-		/*
-		 * This initialization is unnecessary.  If it's omitted, though,
-		 * clang gets confused and warns on the subsequent use of t_new.
-		 */
-		activity_callback_thunk_t t_new = {NULL, NULL};
-		WRITE(t_new, activity_callback_thunk_t);
-		tsd_activity_callback_thunk_set(tsd, t_new);
-	}
-	ret = 0;
-label_return:
-	return ret;
-}
-
 /*
  * Output six memory utilization entries for an input pointer, the first one of
  * type (void *) and the remaining five of type size_t, describing the following
@@ -3986,8 +4318,8 @@ label_return:
  * motivation from C++.
  */
 static int
-experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	assert(sizeof(inspect_extent_util_stats_verbose_t)
@@ -4002,8 +4334,8 @@ experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
 
 	void *ptr = NULL;
 	WRITE(ptr, void *);
-	inspect_extent_util_stats_verbose_t *util_stats
-	    = (inspect_extent_util_stats_verbose_t *)oldp;
+	inspect_extent_util_stats_verbose_t *util_stats =
+	    (inspect_extent_util_stats_verbose_t *)oldp;
 	inspect_extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
 	    &util_stats->nfree, &util_stats->nregs, &util_stats->size,
 	    &util_stats->bin_nfree, &util_stats->bin_nregs,
@@ -4125,7 +4457,7 @@ experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 
-	void **ptrs = (void **)newp;
+	void                       **ptrs = (void **)newp;
 	inspect_extent_util_stats_t *util_stats =
 	    (inspect_extent_util_stats_t *)oldp;
 	size_t i;
@@ -4141,8 +4473,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-experimental_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+experimental_arenas_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -4157,8 +4489,8 @@ label_return:
 }
 
 static int
-experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	if (!config_stats) {
 		return ENOENT;
 	}
@@ -4168,16 +4500,16 @@ experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
 
 	unsigned arena_ind;
 	arena_t *arena;
-	int ret;
-	size_t *pactivep;
+	int      ret;
+	size_t  *pactivep;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	MIB_UNSIGNED(arena_ind, 2);
-	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
-#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) ||				\
-    defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER)
+	if (arena_ind < narenas_total_get()
+	    && (arena = arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) || defined(JEMALLOC_GCC_SYNC_ATOMICS) \
+    || defined(_MSC_VER)
 		/* Expose the underlying counter for fast read. */
 		pactivep = (size_t *)&(arena->pa_shard.nactive.repr);
 		READ(pactivep, size_t *);
@@ -4229,7 +4561,7 @@ label_return:
 typedef struct write_cb_packet_s write_cb_packet_t;
 struct write_cb_packet_s {
 	write_cb_t *write_cb;
-	void *cbopaque;
+	void       *cbopaque;
 };
 
 static int
@@ -4248,8 +4580,8 @@ experimental_prof_recent_alloc_dump_ctl(tsd_t *tsd, const size_t *mib,
 	write_cb_packet_t write_cb_packet;
 	ASSURED_WRITE(write_cb_packet, write_cb_packet_t);
 
-	prof_recent_alloc_dump(tsd, write_cb_packet.write_cb,
-	    write_cb_packet.cbopaque);
+	prof_recent_alloc_dump(
+	    tsd, write_cb_packet.write_cb, write_cb_packet.cbopaque);
 
 	ret = 0;
 
@@ -4262,12 +4594,12 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static int
-experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	VERIFY_READ(size_t);
@@ -4288,8 +4620,8 @@ label_return:
 static int
 prof_stats_bins_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned binind;
+	int          ret;
+	unsigned     binind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4314,8 +4646,8 @@ label_return:
 static int
 prof_stats_bins_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned binind;
+	int          ret;
+	unsigned     binind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4338,8 +4670,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-prof_stats_bins_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+prof_stats_bins_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
 		return NULL;
 	}
@@ -4352,8 +4684,8 @@ prof_stats_bins_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 static int
 prof_stats_lextents_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned lextent_ind;
+	int          ret;
+	unsigned     lextent_ind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4378,8 +4710,8 @@ label_return:
 static int
 prof_stats_lextents_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned lextent_ind;
+	int          ret;
+	unsigned     lextent_ind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4402,8 +4734,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-prof_stats_lextents_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+prof_stats_lextents_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
 		return NULL;
 	}
diff --git a/src/decay.c b/src/decay.c
index d801b2bc..7bbce2a6 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -4,9 +4,8 @@
 #include "jemalloc/internal/decay.h"
 
 static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
-#define STEP(step, h, x, y)			\
-		h,
-		SMOOTHSTEP
+#define STEP(step, h, x, y) h,
+    SMOOTHSTEP
 #undef STEP
 };
 
@@ -14,15 +13,16 @@ static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
  * Generate a new deadline that is uniformly random within the next epoch after
  * the current one.
  */
-void
+static void
 decay_deadline_init(decay_t *decay) {
 	nstime_copy(&decay->deadline, &decay->epoch);
 	nstime_add(&decay->deadline, &decay->interval);
 	if (decay_ms_read(decay) > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
-		    nstime_ns(&decay->interval)));
+		nstime_init(&jitter,
+		    prng_range_u64(
+		        &decay->jitter_state, nstime_ns(&decay->interval)));
 		nstime_add(&decay->deadline, &jitter);
 	}
 }
@@ -31,8 +31,8 @@ void
 decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
 	if (decay_ms > 0) {
-		nstime_init(&decay->interval, (uint64_t)decay_ms *
-		    KQU(1000000));
+		nstime_init(
+		    &decay->interval, (uint64_t)decay_ms * KQU(1000000));
 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
@@ -52,7 +52,7 @@ decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 		decay->ceil_npages = 0;
 	}
 	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	decay->purging = false;
@@ -65,8 +65,8 @@ decay_ms_valid(ssize_t decay_ms) {
 	if (decay_ms < -1) {
 		return false;
 	}
-	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
-	    KQU(1000)) {
+	if (decay_ms == -1
+	    || (uint64_t)decay_ms <= NSTIME_SEC_MAX * KQU(1000)) {
 		return true;
 	}
 	return false;
@@ -74,8 +74,8 @@ decay_ms_valid(ssize_t decay_ms) {
 
 static void
 decay_maybe_update_time(decay_t *decay, nstime_t *new_time) {
-	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch,
-	    new_time) > 0)) {
+	if (unlikely(!nstime_monotonic()
+	        && nstime_compare(&decay->epoch, new_time) > 0)) {
 		/*
 		 * Time went backwards.  Move the epoch back in time and
 		 * generate a new deadline, with the expectation that time
@@ -115,11 +115,11 @@ decay_backlog_npages_limit(const decay_t *decay) {
  * placed as the newest record.
  */
 static void
-decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
-    size_t current_npages) {
+decay_backlog_update(
+    decay_t *decay, uint64_t nadvance_u64, size_t current_npages) {
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
+		memset(decay->backlog, 0,
+		    (SMOOTHSTEP_NSTEPS - 1) * sizeof(size_t));
 	} else {
 		size_t nadvance_z = (size_t)nadvance_u64;
 
@@ -128,14 +128,15 @@ decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
 		memmove(decay->backlog, &decay->backlog[nadvance_z],
 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
 		if (nadvance_z > 1) {
-			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+			memset(&decay->backlog[SMOOTHSTEP_NSTEPS - nadvance_z],
+			    0, (nadvance_z - 1) * sizeof(size_t));
 		}
 	}
 
-	size_t npages_delta = (current_npages > decay->nunpurged) ?
-	    current_npages - decay->nunpurged : 0;
-	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
+	size_t npages_delta = (current_npages > decay->nunpurged)
+	    ? current_npages - decay->nunpurged
+	    : 0;
+	decay->backlog[SMOOTHSTEP_NSTEPS - 1] = npages_delta;
 
 	if (config_debug) {
 		if (current_npages > decay->ceil_npages) {
@@ -157,6 +158,7 @@ decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
 uint64_t
 decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
 	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
+	assert(decay_interval_ns != 0);
 	size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns);
 
 	uint64_t npages_purge;
@@ -164,18 +166,17 @@ decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
 		npages_purge = npages_new;
 	} else {
 		uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
-		assert(h_steps_max >=
-		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
-		npages_purge = npages_new * (h_steps_max -
-		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		assert(h_steps_max >= h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		npages_purge = npages_new
+		    * (h_steps_max - h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
 		npages_purge >>= SMOOTHSTEP_BFP;
 	}
 	return npages_purge;
 }
 
 bool
-decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
-    size_t npages_current) {
+decay_maybe_advance_epoch(
+    decay_t *decay, nstime_t *new_time, size_t npages_current) {
 	/* Handle possible non-monotonicity of time. */
 	decay_maybe_update_time(decay, new_time);
 
@@ -201,8 +202,9 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
 	decay_backlog_update(decay, nadvance_u64, npages_current);
 
 	decay->npages_limit = decay_backlog_npages_limit(decay);
-	decay->nunpurged = (decay->npages_limit > npages_current) ?
-	    decay->npages_limit : npages_current;
+	decay->nunpurged = (decay->npages_limit > npages_current)
+	    ? decay->npages_limit
+	    : npages_current;
 
 	return true;
 }
@@ -225,21 +227,21 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
  */
 static inline size_t
 decay_npurge_after_interval(decay_t *decay, size_t interval) {
-	size_t i;
+	size_t   i;
 	uint64_t sum = 0;
 	for (i = 0; i < interval; i++) {
 		sum += decay->backlog[i] * h_steps[i];
 	}
 	for (; i < SMOOTHSTEP_NSTEPS; i++) {
-		sum += decay->backlog[i] *
-		    (h_steps[i] - h_steps[i - interval]);
+		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
 	}
 
 	return (size_t)(sum >> SMOOTHSTEP_BFP);
 }
 
-uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
-    uint64_t npages_threshold) {
+uint64_t
+decay_ns_until_purge(
+    decay_t *decay, size_t npages_current, uint64_t npages_threshold) {
 	if (!decay_gradually(decay)) {
 		return DECAY_UNBOUNDED_TIME_TO_PURGE;
 	}
@@ -277,7 +279,7 @@ uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
 	}
 
 	unsigned n_search = 0;
-	size_t target, npurge;
+	size_t   target, npurge;
 	while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) {
 		target = (lb + ub) / 2;
 		npurge = decay_npurge_after_interval(decay, target);
diff --git a/src/ecache.c b/src/ecache.c
index a242227d..20fcee9e 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -7,7 +7,7 @@ bool
 ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
     bool delay_coalesce) {
 	if (malloc_mutex_init(&ecache->mtx, "extents", WITNESS_RANK_EXTENTS,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	ecache->state = state;
diff --git a/src/edata.c b/src/edata.c
index 82b6f565..d71d1679 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,6 +1,5 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, edata_avail, edata_t, avail_link,
-    edata_esnead_comp)
-ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp)
+ph_gen(, edata_avail, edata_t, avail_link, edata_esnead_comp)
+    ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp)
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 6bc1848c..3ac8273a 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -11,7 +11,7 @@ edata_cache_init(edata_cache_t *edata_cache, base_t *base) {
 	 */
 	atomic_store_zu(&edata_cache->count, 0, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&edata_cache->mtx, "edata_cache",
-	    WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	edata_cache->base = base;
@@ -63,8 +63,7 @@ edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback) {
 }
 
 static void
-edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
-    edata_cache_fast_t *ecs) {
+edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	edata_t *edata;
 	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
 	for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
@@ -80,8 +79,8 @@ edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
 
 edata_t *
 edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_EDATA_CACHE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
@@ -118,7 +117,7 @@ edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	 * flush and disable pathways.
 	 */
 	edata_t *edata;
-	size_t nflushed = 0;
+	size_t   nflushed = 0;
 	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
 	while ((edata = edata_list_inactive_first(&ecs->list)) != NULL) {
 		edata_list_inactive_remove(&ecs->list, edata);
@@ -131,8 +130,8 @@ edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 
 void
 edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_EDATA_CACHE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
diff --git a/src/ehooks.c b/src/ehooks.c
index 383e9de6..d7abb960 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -27,9 +27,10 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	assert(alignment != 0);
 
 	/* "primary" dss. */
-	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
+	if (have_dss && dss_prec == dss_prec_primary
+	    && (ret = extent_alloc_dss(
+	            tsdn, arena, new_addr, size, alignment, zero, commit))
+	        != NULL) {
 		return ret;
 	}
 	/* mmap. */
@@ -38,9 +39,10 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return ret;
 	}
 	/* "secondary" dss. */
-	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
+	if (have_dss && dss_prec == dss_prec_secondary
+	    && (ret = extent_alloc_dss(
+	            tsdn, arena, new_addr, size, alignment, zero, commit))
+	        != NULL) {
 		return ret;
 	}
 
@@ -53,11 +55,12 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
 	/* NULL arena indicates arena_create. */
-	assert(arena != NULL || alignment == HUGEPAGE);
-	dss_prec_t dss = (arena == NULL) ? dss_prec_disabled :
-	    (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
-	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment,
-	    zero, commit, dss);
+	assert(arena != NULL || alignment == BASE_BLOCK_MIN_ALIGN);
+	dss_prec_t dss = (arena == NULL)
+	    ? dss_prec_disabled
+	    : (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
+	void      *ret = extent_alloc_core(
+            tsdn, arena, new_addr, size, alignment, zero, commit, dss);
 	if (have_madvise_huge && ret) {
 		pages_set_thp_state(ret, size);
 	}
@@ -100,8 +103,8 @@ ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
-	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
+	return pages_commit(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -112,8 +115,8 @@ ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
-	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
+	return pages_decommit(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -125,8 +128,8 @@ ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_LAZY
 bool
 ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
+	return pages_purge_lazy(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -143,8 +146,8 @@ ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_FORCED
 bool
 ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_forced((void *)((uintptr_t)addr +
-	    (uintptr_t)offset), length);
+	return pages_purge_forced(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -159,7 +162,7 @@ ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
 #endif
 
 bool
-ehooks_default_split_impl() {
+ehooks_default_split_impl(void) {
 	if (!maps_coalesce) {
 		/*
 		 * Without retain, only whole regions can be purged (required by
@@ -201,11 +204,11 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 		return true;
 	}
 	if (config_debug) {
-		edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global,
-		    addr_a);
-		bool head_a = edata_is_head_get(a);
-		edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global,
-		    addr_b);
+		edata_t *a = emap_edata_lookup(
+		    tsdn, &arena_emap_global, addr_a);
+		bool     head_a = edata_is_head_get(a);
+		edata_t *b = emap_edata_lookup(
+		    tsdn, &arena_emap_global, addr_b);
 		bool head_b = edata_is_head_get(b);
 		emap_assert_mapped(tsdn, &arena_emap_global, a);
 		emap_assert_mapped(tsdn, &arena_emap_global, b);
@@ -254,22 +257,17 @@ ehooks_default_unguard_impl(void *guard1, void *guard2) {
 	pages_unmark_guards(guard1, guard2);
 }
 
-const extent_hooks_t ehooks_default_extent_hooks = {
-	ehooks_default_alloc,
-	ehooks_default_dalloc,
-	ehooks_default_destroy,
-	ehooks_default_commit,
-	ehooks_default_decommit,
+const extent_hooks_t ehooks_default_extent_hooks = {ehooks_default_alloc,
+    ehooks_default_dalloc, ehooks_default_destroy, ehooks_default_commit,
+    ehooks_default_decommit,
 #ifdef PAGES_CAN_PURGE_LAZY
-	ehooks_default_purge_lazy,
+    ehooks_default_purge_lazy,
 #else
-	NULL,
+    NULL,
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
-	ehooks_default_purge_forced,
+    ehooks_default_purge_forced,
 #else
-	NULL,
+    NULL,
 #endif
-	ehooks_default_split,
-	ehooks_default_merge
-};
+    ehooks_default_split, ehooks_default_merge};
diff --git a/src/emap.c b/src/emap.c
index 9cc95a72..54bfabab 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -16,10 +16,10 @@ emap_init(emap_t *emap, base_t *base, bool zeroed) {
 }
 
 void
-emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t state) {
-	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE);
+emap_update_edata_state(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t state) {
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 
 	edata_state_set(edata, state);
 
@@ -28,10 +28,11 @@ emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
 	    /* init_missing */ false);
 	assert(elm1 != NULL);
-	rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE ? NULL :
-	    rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_last_get(edata), /* dependent */ true,
-	    /* init_missing */ false);
+	rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE
+	    ? NULL
+	    : rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	          (uintptr_t)edata_last_get(edata), /* dependent */ true,
+	          /* init_missing */ false);
 
 	rtree_leaf_elm_state_update(tsdn, &emap->rtree, elm1, elm2, state);
 
@@ -42,17 +43,17 @@ static inline edata_t *
 emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_pai_t pai, extent_state_t expected_state, bool forward,
     bool expanding) {
-	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE);
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	assert(!edata_guarded_get(edata));
 	assert(!expanding || forward);
 	assert(!edata_state_in_transition(expected_state));
-	assert(expected_state == extent_state_dirty ||
-	       expected_state == extent_state_muzzy ||
-	       expected_state == extent_state_retained);
+	assert(expected_state == extent_state_dirty
+	    || expected_state == extent_state_muzzy
+	    || expected_state == extent_state_retained);
 
-	void *neighbor_addr = forward ? edata_past_get(edata) :
-	    edata_before_get(edata);
+	void *neighbor_addr = forward ? edata_past_get(edata)
+	                              : edata_before_get(edata);
 	/*
 	 * This is subtle; the rtree code asserts that its input pointer is
 	 * non-NULL, and this is a useful thing to check.  But it's possible
@@ -73,10 +74,10 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 		return NULL;
 	}
 
-	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
-	    &emap->rtree, elm, /* dependent */ true);
+	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(
+	    tsdn, &emap->rtree, elm, /* dependent */ false);
 	if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai,
-	    expected_state, forward, expanding)) {
+	        expected_state, forward, expanding)) {
 		return NULL;
 	}
 
@@ -109,8 +110,8 @@ emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
 }
 
 void
-emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t new_state) {
+emap_release_edata(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t new_state) {
 	assert(emap_edata_in_transition(tsdn, emap, edata));
 	assert(emap_edata_is_acquired(tsdn, emap, edata));
 
@@ -145,8 +146,8 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 	contents.edata = edata;
 	contents.metadata.szind = szind;
 	contents.metadata.slab = slab;
-	contents.metadata.is_head = (edata == NULL) ? false :
-	    edata_is_head_get(edata);
+	contents.metadata.is_head = (edata == NULL) ? false
+	                                            : edata_is_head_get(edata);
 	contents.metadata.state = (edata == NULL) ? 0 : edata_state_get(edata);
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
@@ -155,29 +156,33 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 }
 
 bool
-emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind, bool slab) {
+emap_register_boundary(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab) {
 	assert(edata_state_get(edata) == extent_state_active);
 	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_leaf_elm_t *elm_a, *elm_b;
-	bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
-	    false, true, &elm_a, &elm_b);
+	bool              err = emap_rtree_leaf_elms_lookup(
+            tsdn, emap, rtree_ctx, edata, false, true, &elm_a, &elm_b);
 	if (err) {
 		return true;
 	}
 	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
-	    /* dependent */ false).edata == NULL);
+	           /* dependent */ false)
+	           .edata
+	    == NULL);
 	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
-	    /* dependent */ false).edata == NULL);
+	           /* dependent */ false)
+	           .edata
+	    == NULL);
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
 	return false;
 }
 
 /* Invoked *after* emap_register_boundary. */
 void
-emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind) {
+emap_register_interior(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	assert(edata_slab_get(edata));
@@ -226,10 +231,10 @@ emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm_a, *elm_b;
 
-	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
-	    true, false, &elm_a, &elm_b);
-	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES,
-	    false);
+	emap_rtree_leaf_elms_lookup(
+	    tsdn, emap, rtree_ctx, edata, true, false, &elm_a, &elm_b);
+	emap_rtree_write_acquired(
+	    tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES, false);
 }
 
 void
@@ -245,8 +250,8 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 }
 
 void
-emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
-    bool slab) {
+emap_remap(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	if (szind != SC_NSIZES) {
@@ -274,8 +279,8 @@ emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		if (slab && edata_size_get(edata) > PAGE) {
 			uintptr_t key = (uintptr_t)edata_past_get(edata)
 			    - (uintptr_t)PAGE;
-			rtree_write(tsdn, &emap->rtree, rtree_ctx, key,
-			    contents);
+			rtree_write(
+			    tsdn, &emap->rtree, rtree_ctx, key, contents);
 		}
 	}
 }
@@ -344,29 +349,29 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	clear_contents.metadata.state = (extent_state_t)0;
 
 	if (prepare->lead_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->lead_elm_b, clear_contents);
+		rtree_leaf_elm_write(
+		    tsdn, &emap->rtree, prepare->lead_elm_b, clear_contents);
 	}
 
 	rtree_leaf_elm_t *merged_b;
 	if (prepare->trail_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->trail_elm_a, clear_contents);
+		rtree_leaf_elm_write(
+		    tsdn, &emap->rtree, prepare->trail_elm_a, clear_contents);
 		merged_b = prepare->trail_elm_b;
 	} else {
 		merged_b = prepare->trail_elm_a;
 	}
 
-	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
-	    lead, SC_NSIZES, false);
+	emap_rtree_write_acquired(
+	    tsdn, emap, prepare->lead_elm_a, merged_b, lead, SC_NSIZES, false);
 }
 
 void
 emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata));
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)edata_base_get(edata));
 	assert(contents.edata == edata);
 	assert(contents.metadata.is_head == edata_is_head_get(edata));
 	assert(contents.metadata.state == edata_state_get(edata));
@@ -375,12 +380,12 @@ emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 void
 emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	emap_full_alloc_ctx_t context1 = {0};
-	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_base_get(edata),
-	    &context1);
+	emap_full_alloc_ctx_try_lookup(
+	    tsdn, emap, edata_base_get(edata), &context1);
 	assert(context1.edata == NULL);
 
 	emap_full_alloc_ctx_t context2 = {0};
-	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_last_get(edata),
-	    &context2);
+	emap_full_alloc_ctx_try_lookup(
+	    tsdn, emap, edata_last_get(edata), &context2);
 	assert(context2.edata == NULL);
 }
diff --git a/src/eset.c b/src/eset.c
index 6f8f335e..4a427d78 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -48,32 +48,32 @@ eset_nbytes_get(eset_t *eset, pszind_t pind) {
 
 static void
 eset_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
-	    ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nextents, cur + 1,
-	    ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(
+	    &eset->bin_stats[pind].nextents, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nextents, cur + 1, ATOMIC_RELAXED);
 	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur + sz,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nbytes, cur + sz, ATOMIC_RELAXED);
 }
 
 static void
 eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
-	    ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nextents, cur - 1,
-	    ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(
+	    &eset->bin_stats[pind].nextents, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nextents, cur - 1, ATOMIC_RELAXED);
 	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur - sz,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nbytes, cur - sz, ATOMIC_RELAXED);
 }
 
 void
 eset_insert(eset_t *eset, edata_t *edata) {
 	assert(edata_state_get(edata) == eset->state);
 
-	size_t size = edata_size_get(edata);
-	size_t psz = sz_psz_quantize_floor(size);
+	size_t   size = edata_size_get(edata);
+	size_t   psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 
 	edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
@@ -86,8 +86,9 @@ eset_insert(eset_t *eset, edata_t *edata) {
 		 * There's already a min element; update the summary if we're
 		 * about to insert a lower one.
 		 */
-		if (edata_cmp_summary_comp(edata_cmp_summary,
-		    eset->bins[pind].heap_min) < 0) {
+		if (edata_cmp_summary_comp(
+		        edata_cmp_summary, eset->bins[pind].heap_min)
+		    < 0) {
 			eset->bins[pind].heap_min = edata_cmp_summary;
 		}
 	}
@@ -104,19 +105,18 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	 * don't need an atomic fetch-add; we can get by with a load followed by
 	 * a store.
 	 */
-	size_t cur_eset_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
-	    ATOMIC_RELAXED);
+	size_t cur_eset_npages = atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->npages, cur_eset_npages + npages, ATOMIC_RELAXED);
 }
 
 void
 eset_remove(eset_t *eset, edata_t *edata) {
-	assert(edata_state_get(edata) == eset->state ||
-	    edata_state_in_transition(edata_state_get(edata)));
+	assert(edata_state_get(edata) == eset->state
+	    || edata_state_in_transition(edata_state_get(edata)));
 
-	size_t size = edata_size_get(edata);
-	size_t psz = sz_psz_quantize_floor(size);
+	size_t   size = edata_size_get(edata);
+	size_t   psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
@@ -136,8 +136,9 @@ eset_remove(eset_t *eset, edata_t *edata) {
 		 * summaries of the removed element and the min element should
 		 * compare equal.
 		 */
-		if (edata_cmp_summary_comp(edata_cmp_summary,
-		    eset->bins[pind].heap_min) == 0) {
+		if (edata_cmp_summary_comp(
+		        edata_cmp_summary, eset->bins[pind].heap_min)
+		    == 0) {
 			eset->bins[pind].heap_min = edata_cmp_summary_get(
 			    edata_heap_first(&eset->bins[pind].heap));
 		}
@@ -148,11 +149,77 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	 * As in eset_insert, we hold eset->mtx and so don't need atomic
 	 * operations for updating eset->npages.
 	 */
-	size_t cur_extents_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	size_t cur_extents_npages = atomic_load_zu(
+	    &eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
-	atomic_store_zu(&eset->npages,
-	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
+	atomic_store_zu(&eset->npages, cur_extents_npages - (size >> LG_PAGE),
+	    ATOMIC_RELAXED);
+}
+
+static edata_t *
+eset_enumerate_alignment_search(
+    eset_t *eset, size_t size, pszind_t bin_ind, size_t alignment) {
+	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
+		return NULL;
+	}
+
+	edata_t                      *edata = NULL;
+	edata_heap_enumerate_helper_t helper;
+	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+	while ((edata = edata_heap_enumerate_next(
+	            &eset->bins[bin_ind].heap, &helper))
+	    != NULL) {
+		uintptr_t base = (uintptr_t)edata_base_get(edata);
+		size_t    candidate_size = edata_size_get(edata);
+		if (candidate_size < size) {
+			continue;
+		}
+
+		uintptr_t next_align = ALIGNMENT_CEILING(
+		    (uintptr_t)base, PAGE_CEILING(alignment));
+		if (base > next_align || base + candidate_size <= next_align) {
+			/* Overflow or not crossing the next alignment. */
+			continue;
+		}
+
+		size_t leadsize = next_align - base;
+		if (candidate_size - leadsize >= size) {
+			return edata;
+		}
+	}
+
+	return NULL;
+}
+
+static edata_t *
+eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
+    bool exact_only, edata_cmp_summary_t *ret_summ) {
+	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
+		return NULL;
+	}
+
+	edata_t                      *ret = NULL, *edata = NULL;
+	edata_heap_enumerate_helper_t helper;
+	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+	while ((edata = edata_heap_enumerate_next(
+	            &eset->bins[bin_ind].heap, &helper))
+	    != NULL) {
+		if ((!exact_only && edata_size_get(edata) >= size)
+		    || (exact_only && edata_size_get(edata) == size)) {
+			edata_cmp_summary_t temp_summ = edata_cmp_summary_get(
+			    edata);
+			if (ret == NULL
+			    || edata_cmp_summary_comp(temp_summ, *ret_summ)
+			        < 0) {
+				ret = edata;
+				*ret_summ = temp_summ;
+			}
+		}
+	}
+
+	return ret;
 }
 
 /*
@@ -160,24 +227,35 @@ eset_remove(eset_t *eset, edata_t *edata) {
  * requirement.  For each size, try only the first extent in the heap.
  */
 static edata_t *
-eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
-    size_t alignment) {
-        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
-        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
+eset_fit_alignment(
+    eset_t *eset, size_t min_size, size_t max_size, size_t alignment) {
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
+	pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
+
+	/* See comments in eset_first_fit for why we enumerate search below. */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size));
+	if (sz_large_size_classes_disabled() && pind != pind_prev) {
+		edata_t *ret = NULL;
+		ret = eset_enumerate_alignment_search(
+		    eset, min_size, pind_prev, alignment);
+		if (ret != NULL) {
+			return ret;
+		}
+	}
 
 	for (pszind_t i =
-	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
-	    i < pind_max;
-	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
+	         (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	     i < pind_max;
+	     i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(i < SC_NPSIZES);
 		assert(!edata_heap_empty(&eset->bins[i].heap));
-		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
+		edata_t  *edata = edata_heap_first(&eset->bins[i].heap);
 		uintptr_t base = (uintptr_t)edata_base_get(edata);
-		size_t candidate_size = edata_size_get(edata);
+		size_t    candidate_size = edata_size_get(edata);
 		assert(candidate_size >= min_size);
 
-		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
-		    PAGE_CEILING(alignment));
+		uintptr_t next_align = ALIGNMENT_CEILING(
+		    (uintptr_t)base, PAGE_CEILING(alignment));
 		if (base > next_align || base + candidate_size <= next_align) {
 			/* Overflow or not crossing the next alignment. */
 			continue;
@@ -203,22 +281,58 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * for others.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size, bool exact_only,
-    unsigned lg_max_fit) {
-	edata_t *ret = NULL;
+eset_first_fit(
+    eset_t *eset, size_t size, bool exact_only, unsigned lg_max_fit) {
+	edata_t                     *ret = NULL;
 	edata_cmp_summary_t ret_summ JEMALLOC_CC_SILENCE_INIT({0});
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
-		return edata_heap_empty(&eset->bins[pind].heap) ? NULL :
-		    edata_heap_first(&eset->bins[pind].heap);
+		if (sz_large_size_classes_disabled()) {
+			pszind_t pind_prev = sz_psz2ind(
+			    sz_psz_quantize_floor(size));
+			return eset_enumerate_search(eset, size, pind_prev,
+			    /* exact_only */ true, &ret_summ);
+		} else {
+			return edata_heap_empty(&eset->bins[pind].heap)
+			    ? NULL
+			    : edata_heap_first(&eset->bins[pind].heap);
+		}
+	}
+
+	/*
+	 * Each element in the eset->bins is a heap corresponding to a size
+	 * class.  When sz_large_size_classes_disabled() is false, all heaps after
+	 * pind (including pind itself) will surely satisfy the rquests while
+	 * heaps before pind cannot satisfy the request because usize is
+	 * calculated based on size classes then.  However, when
+	 * sz_large_size_classes_disabled() is true, usize is calculated by
+	 * ceiling user requested size to the closest multiple of PAGE.  This
+	 * means in the heap before pind, i.e., pind_prev, there may exist
+	 * extents able to satisfy the request and we should enumerate the heap
+	 * when pind_prev != pind.
+	 *
+	 * For example, when PAGE=4KB and the user requested size is 1MB + 4KB,
+	 * usize would be 1.25MB when sz_large_size_classes_disabled() is false.
+	 * pind points to the heap containing extents ranging in
+	 * [1.25MB, 1.5MB).  Thus, searching starting from pind will not miss
+	 * any candidates.  When sz_large_size_classes_disabled() is true, the
+	 * usize would be 1MB + 4KB and pind still points to the same heap.
+	 * In this case, the heap pind_prev points to, which contains extents
+	 * in the range [1MB, 1.25MB), may contain candidates satisfying the
+	 * usize and thus should be enumerated.
+	 */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
+	if (sz_large_size_classes_disabled() && pind != pind_prev) {
+		ret = eset_enumerate_search(eset, size, pind_prev,
+		    /* exact_only */ false, &ret_summ);
 	}
 
 	for (pszind_t i =
-	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
-	    i < ESET_NPSIZES;
-	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
+	         (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	     i < ESET_NPSIZES;
+	     i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(!edata_heap_empty(&eset->bins[i].heap));
 		if (lg_max_fit == SC_PTR_BITS) {
 			/*
@@ -231,8 +345,9 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 		if ((sz_pind2sz(i) >> lg_max_fit) > size) {
 			break;
 		}
-		if (ret == NULL || edata_cmp_summary_comp(
-		    eset->bins[i].heap_min, ret_summ) < 0) {
+		if (ret == NULL
+		    || edata_cmp_summary_comp(eset->bins[i].heap_min, ret_summ)
+		        < 0) {
 			/*
 			 * We grab the edata as early as possible, even though
 			 * we might change it later.  Practically, a large
@@ -243,9 +358,10 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 			edata_t *edata = edata_heap_first(&eset->bins[i].heap);
 			assert(edata_size_get(edata) >= size);
 			assert(ret == NULL || edata_snad_comp(edata, ret) < 0);
-			assert(ret == NULL || edata_cmp_summary_comp(
-			    eset->bins[i].heap_min,
-			    edata_cmp_summary_get(edata)) == 0);
+			assert(ret == NULL
+			    || edata_cmp_summary_comp(eset->bins[i].heap_min,
+			           edata_cmp_summary_get(edata))
+			        == 0);
 			ret = edata;
 			ret_summ = eset->bins[i].heap_min;
 		}
diff --git a/src/exp_grow.c b/src/exp_grow.c
index 386471f4..955823a1 100644
--- a/src/exp_grow.c
+++ b/src/exp_grow.c
@@ -3,6 +3,12 @@
 
 void
 exp_grow_init(exp_grow_t *exp_grow) {
-	exp_grow->next = sz_psz2ind(HUGEPAGE);
+	/*
+	 * Enforce a minimal of 2M grow, which is convenient for the huge page
+	 * use cases.  Avoid using HUGEPAGE as the value though, because on some
+	 * platforms it can be very large (e.g. 512M on aarch64 w/ 64K pages).
+	 */
+	const size_t min_grow = (size_t)2 << 20;
+	exp_grow->next = sz_psz2ind(min_grow);
 	exp_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
 }
diff --git a/src/extent.c b/src/extent.c
index cf3d1f31..118c8785 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -12,6 +12,14 @@
 /* Data. */
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+/* This option is intended for kernel tuning, not app tuning. */
+size_t opt_process_madvise_max_batch =
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+    PROCESS_MADVISE_MAX_BATCH_DEFAULT;
+#else
+    0
+#endif
+;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length, bool growing_retained);
@@ -21,8 +29,8 @@ static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks);
-static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b, bool holding_core_locks);
+static bool     extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+        edata_t *a, edata_t *b, bool holding_core_locks);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -34,7 +42,7 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
+static void     extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
     bool zero, bool *commit, bool growing_retained, bool guarded);
@@ -43,6 +51,8 @@ static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit, bool guarded);
+static bool     extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+        edata_t *edata, size_t offset, size_t length);
 
 /******************************************************************************/
 
@@ -63,8 +73,8 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
-	    edata, &coalesced);
+	edata = extent_try_coalesce(
+	    tsdn, pac, ehooks, ecache, edata, &coalesced);
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
@@ -80,10 +90,10 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool commit = true;
+	bool     commit = true;
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, zero, &commit, false, guarded);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
@@ -97,10 +107,10 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool commit = true;
+	bool     commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, expand_edata,
 	    size, alignment, zero, &commit, guarded);
 	if (edata == NULL) {
@@ -121,10 +131,11 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 			 */
 			return NULL;
 		}
-		void *new_addr = (expand_edata == NULL) ? NULL :
-		    edata_past_get(expand_edata);
-		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
-		    size, alignment, zero, &commit,
+		void *new_addr = (expand_edata == NULL)
+		    ? NULL
+		    : edata_past_get(expand_edata);
+		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr, size,
+		    alignment, zero, &commit,
 		    /* growing_retained */ false);
 	}
 
@@ -138,8 +149,8 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
@@ -148,8 +159,8 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 edata_t *
-ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, size_t npages_min) {
+ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    size_t npages_min) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
@@ -184,8 +195,8 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, pac, ehooks, ecache,
-		    edata)) {
+		if (extent_try_delayed_coalesce(
+		        tsdn, pac, ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -199,16 +210,17 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * concurrent operations.
 	 */
 	switch (ecache->state) {
-	case extent_state_active:
-		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		emap_update_edata_state(tsdn, pac->emap, edata,
-		    extent_state_active);
+		emap_update_edata_state(
+		    tsdn, pac->emap, edata, extent_state_active);
 		break;
 	case extent_state_retained:
 		extent_deregister(tsdn, pac, edata);
 		break;
+	case extent_state_active:
+	case extent_state_transition:
+	case extent_state_merging:
 	default:
 		not_reached();
 	}
@@ -227,16 +239,16 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&pac->stats->abandoned_vm, sz,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &pac->stats->abandoned_vm, sz, ATOMIC_RELAXED);
 	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
 	if (ecache->state == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, ehooks, edata, 0, sz,
-		    growing_retained)) {
+		if (extent_purge_lazy_impl(
+		        tsdn, ehooks, edata, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, ehooks, edata, 0,
 			    edata_size_get(edata), growing_retained);
 		}
@@ -245,20 +257,20 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 static void
-extent_deactivate_locked_impl(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
-    edata_t *edata) {
+extent_deactivate_locked_impl(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
-	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset :
-	    &ecache->eset;
+	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset
+	                                        : &ecache->eset;
 	eset_insert(eset, edata);
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
-    edata_t *edata) {
+extent_deactivate_locked(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
 	assert(edata_state_get(edata) == extent_state_active);
 	extent_deactivate_locked_impl(tsdn, pac, ecache, edata);
 }
@@ -271,11 +283,11 @@ extent_deactivate_check_state_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
-    edata_t *edata) {
+extent_activate_locked(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset, edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == ecache->state ||
-	    edata_state_get(edata) == extent_state_merging);
+	assert(edata_state_get(edata) == ecache->state
+	    || edata_state_get(edata) == extent_state_merging);
 
 	eset_remove(eset, edata);
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
@@ -285,16 +297,18 @@ void
 extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (opt_prof && edata_state_get(edata) == extent_state_active) {
 		size_t nadd = edata_size_get(edata) >> LG_PAGE;
-		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
-		    ATOMIC_RELAXED) + nadd;
+		size_t cur = atomic_fetch_add_zu(
+		                 &curpages, nadd, ATOMIC_RELAXED)
+		    + nadd;
 		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
-		while (cur > high && !atomic_compare_exchange_weak_zu(
-		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
+		while (cur > high
+		    && !atomic_compare_exchange_weak_zu(&highpages, &high, cur,
+		        ATOMIC_RELAXED, ATOMIC_RELAXED)) {
 			/*
 			 * Don't refresh cur, because it may have decreased
 			 * since this thread lost the highpages update race.
@@ -326,7 +340,7 @@ extent_register_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump_add) {
 	 * prevents other threads from accessing the edata.
 	 */
 	if (emap_register_boundary(tsdn, pac->emap, edata, SC_NSIZES,
-	    /* slab */ false)) {
+	        /* slab */ false)) {
 		return true;
 	}
 
@@ -357,8 +371,7 @@ extent_reregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
-    bool gdump) {
+extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump) {
 	emap_deregister_boundary(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump) {
@@ -372,8 +385,7 @@ extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
-    edata_t *edata) {
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
 	extent_deregister_impl(tsdn, pac, edata, false);
 }
 
@@ -400,15 +412,16 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_t *edata;
-	eset_t *eset = guarded ? &ecache->guarded_eset : &ecache->eset;
+	eset_t  *eset = guarded ? &ecache->guarded_eset : &ecache->eset;
 	if (expand_edata != NULL) {
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
 		    expand_edata, EXTENT_PAI_PAC, ecache->state);
 		if (edata != NULL) {
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 			extent_assert_can_expand(expand_edata, edata);
 			if (edata_size_get(edata) < size) {
-				emap_release_edata(tsdn, pac->emap, edata,
-				    ecache->state);
+				emap_release_edata(
+				    tsdn, pac->emap, edata, ecache->state);
 				edata = NULL;
 			}
 		}
@@ -423,7 +436,8 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * put a cap on how big an extent we can split for a request.
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
-		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
+		    ? (unsigned)opt_lg_extent_max_active_fit
+		    : SC_PTR_BITS;
 
 		/*
 		 * If split and merge are not allowed (Windows w/o retain), try
@@ -434,8 +448,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * allocations.
 		 */
 		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
-		edata = eset_fit(eset, size, alignment, exact_only,
-		    lg_max_fit);
+		edata = eset_fit(eset, size, alignment, exact_only, lg_max_fit);
 	}
 	if (edata == NULL) {
 		return NULL;
@@ -477,10 +490,11 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
-    edata_t **to_leak, edata_t **to_salvage,
-    edata_t *expand_edata, size_t size, size_t alignment) {
+    edata_t **to_leak, edata_t **to_salvage, edata_t *expand_edata, size_t size,
+    size_t alignment) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
-	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
+	                      PAGE_CEILING(alignment))
+	    - (uintptr_t)edata_base_get(*edata);
 	assert(expand_edata == NULL || leadsize == 0);
 	if (edata_size_get(*edata) < leadsize + size) {
 		return extent_split_interior_cant_alloc;
@@ -535,14 +549,14 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	assert(!edata_guarded_get(edata) || size == edata_size_get(edata));
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 
-	edata_t *lead;
-	edata_t *trail;
-	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t            *lead;
+	edata_t            *trail;
+	edata_t *to_leak    JEMALLOC_CC_SILENCE_INIT(NULL);
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
-	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    expand_edata, size, alignment);
+	extent_split_interior_result_t result = extent_split_interior(tsdn, pac,
+	    ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, expand_edata,
+	    size, alignment);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -603,8 +617,8 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    expand_edata, size, alignment, guarded);
+	edata_t *edata = extent_recycle_extract(
+	    tsdn, pac, ehooks, ecache, expand_edata, size, alignment, guarded);
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
@@ -618,8 +632,8 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	}
 
 	assert(edata_state_get(edata) == extent_state_active);
-	if (extent_commit_zero(tsdn, ehooks, edata, *commit, zero,
-	    growing_retained)) {
+	if (extent_commit_zero(
+	        tsdn, ehooks, edata, *commit, zero, growing_retained)) {
 		extent_record(tsdn, pac, ehooks, ecache, edata);
 		return NULL;
 	}
@@ -635,14 +649,65 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	return edata;
 }
 
+static void
+extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
+    edata_cache_t *edata_cache, void *addr, size_t size) {
+	assert(opt_huge_arena_pac_thp);
+	assert(opt_metadata_thp != metadata_thp_disabled);
+	/*
+	 * With rounding up the given memory region [addr, addr + size) to
+	 * the huge page region that it crosses boundaries with,
+	 * essentially we're aligning the start addr down and the end addr
+	 * up to the nearest HUGEPAGE boundaries. The memory overhead can
+	 * be within the range of [0, 2 * (HUGEPAGE - 1)].
+	 */
+	void *huge_addr = HUGEPAGE_ADDR2BASE(addr);
+	void *huge_end = HUGEPAGE_ADDR2BASE(
+	    (void *)((byte_t *)addr + (uintptr_t)(size + HUGEPAGE - 1)));
+	assert((uintptr_t)huge_end > (uintptr_t)huge_addr);
+
+	size_t huge_size = (uintptr_t)huge_end - (uintptr_t)huge_addr;
+	assert(
+	    huge_size <= (size + ((HUGEPAGE - 1) << 1)) && huge_size >= size);
+
+	if (opt_metadata_thp == metadata_thp_always
+	    || pac_thp->auto_thp_switched) {
+		pages_huge(huge_addr, huge_size);
+	} else {
+		assert(opt_metadata_thp == metadata_thp_auto);
+		edata_t *edata = edata_cache_get(tsdn, edata_cache);
+
+		malloc_mutex_lock(tsdn, &pac_thp->lock);
+		/* Can happen if the switch is turned on during edata retrieval. */
+		if (pac_thp->auto_thp_switched) {
+			malloc_mutex_unlock(tsdn, &pac_thp->lock);
+			pages_huge(huge_addr, huge_size);
+			if (edata != NULL) {
+				edata_cache_put(tsdn, edata_cache, edata);
+			}
+		} else {
+			if (edata != NULL) {
+				edata_addr_set(edata, huge_addr);
+				edata_size_set(edata, huge_size);
+				edata_list_active_append(
+				    &pac_thp->thp_lazy_list, edata);
+				atomic_fetch_add_u(
+				    &pac_thp->n_thp_lazy, 1, ATOMIC_RELAXED);
+			}
+			malloc_mutex_unlock(tsdn, &pac_thp->lock);
+		}
+		malloc_mutex_assert_not_owner(tsdn, &pac_thp->lock);
+	}
+}
+
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
  * virtual memory ranges retained by each shard.
  */
 static edata_t *
-extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    size_t size, size_t alignment, bool zero, bool *commit) {
+extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
+    size_t alignment, bool zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &pac->grow_mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
@@ -654,10 +719,10 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Find the next extent size in the series that would be large enough to
 	 * satisfy this request.
 	 */
-	size_t alloc_size;
+	size_t   alloc_size;
 	pszind_t exp_grow_skip;
-	bool err = exp_grow_size_prepare(&pac->exp_grow, alloc_size_min,
-	    &alloc_size, &exp_grow_skip);
+	bool     err = exp_grow_size_prepare(
+            &pac->exp_grow, alloc_size_min, &alloc_size, &exp_grow_skip);
 	if (err) {
 		goto label_err;
 	}
@@ -669,18 +734,18 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	bool zeroed = false;
 	bool committed = false;
 
-	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
-	    &committed);
+	void *ptr = ehooks_alloc(
+	    tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed, &committed);
 
 	if (ptr == NULL) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&pac->ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zeroed, committed, EXTENT_PAI_PAC,
-	    EXTENT_IS_HEAD);
+	unsigned ind = ecache_ind_get(&pac->ecache_retained);
+	edata_init(edata, ind, ptr, alloc_size, false, SC_NSIZES,
+	    extent_sn_next(pac), extent_state_active, zeroed, committed,
+	    EXTENT_PAI_PAC, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -691,23 +756,23 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		*commit = true;
 	}
 
-	edata_t *lead;
-	edata_t *trail;
-	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t            *lead;
+	edata_t            *trail;
+	edata_t *to_leak    JEMALLOC_CC_SILENCE_INIT(NULL);
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
-	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
-	    size, alignment);
+	extent_split_interior_result_t result = extent_split_interior(tsdn, pac,
+	    ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL, size,
+	    alignment);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    lead);
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, lead);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    trail);
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, trail);
 		}
 	} else {
 		/*
@@ -731,16 +796,16 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), true)) {
-			extent_record(tsdn, pac, ehooks,
-			    &pac->ecache_retained, edata);
+		if (extent_commit_impl(
+		        tsdn, ehooks, edata, 0, edata_size_get(edata), true)) {
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, edata);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
 		if (config_debug) {
-			void *addr = edata_addr_get(edata);
-			size_t *p = (size_t *)(uintptr_t)addr;
+			void   *addr = edata_addr_get(edata);
+			size_t *p = (size_t *)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
 				assert(p[i] == 0);
@@ -756,13 +821,23 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	exp_grow_size_commit(&pac->exp_grow, exp_grow_skip);
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
+	if (huge_arena_pac_thp.thp_madvise) {
+		/* Avoid using HUGEPAGE when the grow size is less than HUGEPAGE. */
+		if (ind != 0 && ind == huge_arena_ind
+		    && ehooks_are_default(ehooks)
+		    && likely(alloc_size >= HUGEPAGE)) {
+			extent_handle_huge_arena_thp(tsdn, &huge_arena_pac_thp,
+			    pac->edata_cache, ptr, alloc_size);
+		}
+	}
+
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
 		extent_gdump_add(tsdn, edata);
 	}
 	if (zero && !edata_zeroed_get(edata)) {
-		ehooks_zero(tsdn, ehooks, edata_base_get(edata),
-		    edata_size_get(edata));
+		ehooks_zero(
+		    tsdn, ehooks, edata_base_get(edata), edata_size_get(edata));
 	}
 	return edata;
 label_err:
@@ -788,8 +863,8 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && expand_edata == NULL && !guarded) {
-		edata = extent_grow_retained(tsdn, pac, ehooks, size,
-		    alignment, zero, commit);
+		edata = extent_grow_retained(
+		    tsdn, pac, ehooks, size, alignment, zero, commit);
 		/* extent_grow_retained() always releases pac->grow_mtx. */
 	} else {
 		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
@@ -805,12 +880,12 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	extent_assert_can_coalesce(inner, outer);
 	eset_remove(&ecache->eset, outer);
 
-	bool err = extent_merge_impl(tsdn, pac, ehooks,
-	    forward ? inner : outer, forward ? outer : inner,
+	bool err = extent_merge_impl(tsdn, pac, ehooks, forward ? inner : outer,
+	    forward ? outer : inner,
 	    /* holding_core_locks */ true);
 	if (err) {
-		extent_deactivate_check_state_locked(tsdn, pac, ecache, outer,
-		    extent_state_merging);
+		extent_deactivate_check_state_locked(
+		    tsdn, pac, ecache, outer, extent_state_merging);
 	}
 
 	return err;
@@ -818,8 +893,10 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced) {
+    ecache_t *ecache, edata_t *edata, size_t max_size, bool *coalesced) {
 	assert(!edata_guarded_get(edata));
+	assert(coalesced != NULL);
+	*coalesced = false;
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -836,31 +913,47 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce forward. */
 		edata_t *next = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
+		size_t   max_next_neighbor = max_size > edata_size_get(edata)
+		      ? max_size - edata_size_get(edata)
+		      : 0;
 		if (next != NULL) {
-			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    next, true)) {
-				if (ecache->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return edata;
+			if (edata_size_get(next) > max_next_neighbor) {
+				emap_release_edata(
+				    tsdn, pac->emap, next, ecache->state);
+			} else {
+				if (!extent_coalesce(tsdn, pac, ehooks, ecache,
+				        edata, next, true)) {
+					if (ecache->delay_coalesce) {
+						/* Do minimal coalescing. */
+						*coalesced = true;
+						return edata;
+					}
+					again = true;
 				}
-				again = true;
 			}
 		}
 
 		/* Try to coalesce backward. */
 		edata_t *prev = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
+		size_t   max_prev_neighbor = max_size > edata_size_get(edata)
+		      ? max_size - edata_size_get(edata)
+		      : 0;
 		if (prev != NULL) {
-			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    prev, false)) {
-				edata = prev;
-				if (ecache->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return edata;
+			if (edata_size_get(prev) > max_prev_neighbor) {
+				emap_release_edata(
+				    tsdn, pac->emap, prev, ecache->state);
+			} else {
+				if (!extent_coalesce(tsdn, pac, ehooks, ecache,
+				        edata, prev, false)) {
+					edata = prev;
+					if (ecache->delay_coalesce) {
+						/* Do minimal coalescing. */
+						*coalesced = true;
+						return edata;
+					}
+					again = true;
 				}
-				again = true;
 			}
 		}
 	} while (again);
@@ -874,36 +967,33 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
-	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced);
+	return extent_try_coalesce_impl(
+	    tsdn, pac, ehooks, ecache, edata, SC_LARGE_MAXCLASS, coalesced);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced) {
-	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced);
+    ecache_t *ecache, edata_t *edata, size_t max_size, bool *coalesced) {
+	return extent_try_coalesce_impl(
+	    tsdn, pac, ehooks, ecache, edata, max_size, coalesced);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
 static void
-extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_maximally_purge(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	size_t extent_size = edata_size_get(edata);
 	extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*pac->stats_mtx),
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
 		    &pac->stats->decay_dirty.nmadvise, 1);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*pac->stats_mtx),
-		    &pac->stats->decay_dirty.purged,
-		    extent_size >> LG_PAGE);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &pac->stats->decay_dirty.purged, extent_size >> LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx);
-		atomic_fetch_sub_zu(&pac->stats->pac_mapped, extent_size,
-		    ATOMIC_RELAXED);
+		atomic_fetch_sub_zu(
+		    &pac->stats->pac_mapped, extent_size, ATOMIC_RELAXED);
 	}
 }
 
@@ -914,9 +1004,9 @@ extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 void
 extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
-	assert((ecache->state != extent_state_dirty &&
-	    ecache->state != extent_state_muzzy) ||
-	    !edata_zeroed_get(edata));
+	assert((ecache->state != extent_state_dirty
+	           && ecache->state != extent_state_muzzy)
+	    || !edata_zeroed_get(edata));
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -926,19 +1016,49 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		goto label_skip_coalesce;
 	}
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, pac, ehooks, ecache, edata,
-		    NULL);
+		bool coalesced_unused;
+		edata = extent_try_coalesce(
+		    tsdn, pac, ehooks, ecache, edata, &coalesced_unused);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
-		bool coalesced;
+		/**
+		* Maximum size limit (max_size) for large extents waiting to be coalesced
+		* in dirty ecache.
+		*
+		* When set to a non-zero value, this parameter restricts the maximum size
+		* of large extents after coalescing. If the combined size of two extents
+		* would exceed this threshold, the coalescing operation is skipped.
+		*
+		* This improves dirty ecache reuse efficiency by:
+		* - Maintaining appropriately sized extents that match common allocation requests
+		* - Limiting large extent coalescence to prevent overly large extents that are
+		*   less likely to be reused efficiently
+		* - Setting lg_max_coalesce for large extent merging scenarios, similar to how
+		*   lg_max_fit is used during extent reuse
+		*
+		* Note that during extent decay/purge operations, no coalescing restrictions
+		* are applied to dirty ecache despite the delay_coalesce setting. This ensures
+		* that while improving dirty ecache reuse efficiency, we don't compromise
+		* the final coalescing that happens during the transition from dirty ecache
+		* to muzzy/retained ecache states.
+		*/
+		unsigned lg_max_coalesce = (unsigned)
+		    opt_lg_extent_max_active_fit;
+		size_t edata_size = edata_size_get(edata);
+		size_t max_size = (SC_LARGE_MAXCLASS >> lg_max_coalesce)
+		        > edata_size
+		    ? (edata_size << lg_max_coalesce)
+		    : SC_LARGE_MAXCLASS;
+		bool   coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
-			    ecache, edata, &coalesced);
+			    ecache, edata, max_size, &coalesced);
 		} while (coalesced);
-		if (edata_size_get(edata) >=
-		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
+		if (edata_size_get(edata) >= atomic_load_zu(
+		        &pac->oversize_threshold, ATOMIC_RELAXED)
+		    && !background_thread_enabled()
 		    && extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
@@ -953,10 +1073,9 @@ label_skip_coalesce:
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -966,14 +1085,14 @@ extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper_try(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	bool err;
 
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	edata_addr_set(edata, edata_base_get(edata));
 
@@ -989,8 +1108,8 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, void *new_addr,
+    size_t size, size_t alignment, bool zero, bool *commit,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1000,14 +1119,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		return NULL;
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
-	    &zero, commit);
+	void  *addr = ehooks_alloc(
+            tsdn, ehooks, new_addr, size, palignment, &zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
+	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr, size,
+	    /* slab */ false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
 	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
 	/*
@@ -1024,20 +1143,43 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return edata;
 }
 
+static void
+extent_dalloc_wrapper_finish(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
+	if (config_prof) {
+		extent_gdump_sub(tsdn, edata);
+	}
+	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
+}
+
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper_purged(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
+
+	/* Verify that will not go down the dalloc / munmap route. */
+	assert(ehooks_dalloc_will_fail(ehooks));
+
+	edata_zeroed_set(edata, true);
+	extent_dalloc_wrapper_finish(tsdn, pac, ehooks, edata);
+}
+
+void
+extent_dalloc_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
 	if (!ehooks_dalloc_will_fail(ehooks)) {
 		/* Remove guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
-			san_unguard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap);
+			san_unguard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap);
 		}
 		/*
 		 * Deregister first to avoid a race with other allocating
@@ -1054,38 +1196,34 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	bool zeroed;
 	if (!edata_committed_get(edata)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, ehooks, edata, 0,
-	    edata_size_get(edata))) {
+	} else if (!extent_decommit_wrapper(
+	               tsdn, ehooks, edata, 0, edata_size_get(edata))) {
 		zeroed = true;
 	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata))) {
+	               edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = true;
-	} else if (edata_state_get(edata) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata))) {
+	} else if (edata_state_get(edata) == extent_state_muzzy
+	    || !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	        edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
 	}
 	edata_zeroed_set(edata, zeroed);
 
-	if (config_prof) {
-		extent_gdump_sub(tsdn, edata);
-	}
-
-	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
+	extent_dalloc_wrapper_finish(tsdn, pac, ehooks, edata);
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_destroy_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	extent_state_t state = edata_state_get(edata);
 	assert(state == extent_state_retained || state == extent_state_active);
 	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (edata_guarded_get(edata)) {
 		assert(opt_retain);
@@ -1111,18 +1249,11 @@ extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	return err;
 }
 
-bool
-extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length) {
-	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
-	    /* growing_retained */ false);
-}
-
-bool
+static bool
 extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), offset, length);
 	edata_committed_set(edata, edata_committed_get(edata) && err);
@@ -1142,8 +1273,8 @@ extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, ehooks, edata, offset,
-	    length, false);
+	return extent_purge_lazy_impl(
+	    tsdn, ehooks, edata, offset, length, false);
 }
 
 static bool
@@ -1159,8 +1290,8 @@ extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, ehooks, edata, offset, length,
-	    false);
+	return extent_purge_forced_impl(
+	    tsdn, ehooks, edata, offset, length, false);
 }
 
 /*
@@ -1171,16 +1302,16 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks) {
+extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
+    size_t size_a, size_t size_b, bool holding_core_locks) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	/* Only the shrink path may split w/o holding core locks. */
 	if (holding_core_locks) {
 		witness_assert_positive_depth_to_rank(
 		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	} else {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
 	if (ehooks_split_will_fail(ehooks)) {
@@ -1193,13 +1324,13 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_init(trail, edata_arena_ind_get(edata),
-	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    (void *)((byte_t *)edata_base_get(edata) + size_a), size_b,
 	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
 	    edata_state_get(edata), edata_zeroed_get(edata),
 	    edata_committed_get(edata), EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, pac->emap, &prepare, edata,
-	    size_a, trail, size_b);
+	bool           err = emap_split_prepare(
+            tsdn, pac->emap, &prepare, edata, size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1221,8 +1352,8 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, pac->emap, &prepare, edata, size_a, trail,
-	    size_b);
+	emap_split_commit(
+	    tsdn, pac->emap, &prepare, edata, size_a, trail, size_b);
 
 	return trail;
 label_error_b:
@@ -1234,8 +1365,8 @@ label_error_a:
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
     size_t size_a, size_t size_b, bool holding_core_locks) {
-	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
-	    holding_core_locks);
+	return extent_split_impl(
+	    tsdn, pac, ehooks, edata, size_a, size_b, holding_core_locks);
 }
 
 static bool
@@ -1246,8 +1377,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 		witness_assert_positive_depth_to_rank(
 		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	} else {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
 	assert(edata_base_get(a) < edata_base_get(b));
@@ -1272,12 +1403,13 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 	emap_prepare_t prepare;
 	emap_merge_prepare(tsdn, pac->emap, &prepare, a, b);
 
-	assert(edata_state_get(a) == extent_state_active ||
-	    edata_state_get(a) == extent_state_merging);
+	assert(edata_state_get(a) == extent_state_active
+	    || edata_state_get(a) == extent_state_merging);
 	edata_state_set(a, extent_state_active);
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
-	    edata_sn_get(a) : edata_sn_get(b));
+	edata_sn_set(a,
+	    (edata_sn_get(a) < edata_sn_get(b)) ? edata_sn_get(a)
+	                                        : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
 	emap_merge_commit(tsdn, pac->emap, &prepare, a, b);
@@ -1288,26 +1420,26 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b) {
+extent_merge_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a, edata_t *b) {
 	return extent_merge_impl(tsdn, pac, ehooks, a, b,
 	    /* holding_core_locks */ false);
 }
 
 bool
-extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    bool commit, bool zero, bool growing_retained) {
+extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, bool commit,
+    bool zero, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	if (commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), growing_retained)) {
+		        edata_size_get(edata), growing_retained)) {
 			return true;
 		}
 	}
 	if (zero && !edata_zeroed_get(edata)) {
-		void *addr = edata_base_get(edata);
+		void  *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size);
 	}
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9a35bacf..c7c34207 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -8,14 +8,13 @@
 /******************************************************************************/
 /* Data. */
 
-const char	*opt_dss = DSS_DEFAULT;
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+#define SBRK_INVALID ((void *)-1)
 
-const char	*dss_prec_names[] = {
-	"disabled",
-	"primary",
-	"secondary",
-	"N/A"
-};
+const char *opt_dss = DSS_DEFAULT;
+
+const char *const dss_prec_names[] = {
+    "disabled", "primary", "secondary", "N/A"};
 
 /*
  * Current dss precedence default, used when creating new arenas.  NB: This is
@@ -23,17 +22,16 @@ const char	*dss_prec_names[] = {
  * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
  * atomic operations to synchronize the setting.
  */
-static atomic_u_t	dss_prec_default = ATOMIC_INIT(
-    (unsigned)DSS_PREC_DEFAULT);
+static atomic_u_t dss_prec_default = ATOMIC_INIT((unsigned)DSS_PREC_DEFAULT);
 
 /* Base address of the DSS. */
-static void		*dss_base;
+static void *dss_base;
 /* Atomic boolean indicating whether a thread is currently extending DSS. */
-static atomic_b_t	dss_extending;
+static atomic_b_t dss_extending;
 /* Atomic boolean indicating whether the DSS is exhausted. */
-static atomic_b_t	dss_exhausted;
+static atomic_b_t dss_exhausted;
 /* Atomic current upper limit on DSS addresses. */
-static atomic_p_t	dss_max;
+static atomic_p_t dss_max;
 
 /******************************************************************************/
 
@@ -73,7 +71,7 @@ extent_dss_extending_start(void) {
 	while (true) {
 		bool expected = false;
 		if (atomic_compare_exchange_weak_b(&dss_extending, &expected,
-		    true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
+		        true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
 			break;
 		}
 		spin_adaptive(&spinner);
@@ -94,7 +92,7 @@ extent_dss_max_update(void *new_addr) {
 	 * up to date.
 	 */
 	void *max_cur = extent_dss_sbrk(0);
-	if (max_cur == (void *)-1) {
+	if (max_cur == SBRK_INVALID) {
 		return NULL;
 	}
 	atomic_store_p(&dss_max, max_cur, ATOMIC_RELEASE);
@@ -140,51 +138,54 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				goto label_oom;
 			}
 
-			bool head_state = opt_retain ? EXTENT_IS_HEAD :
-			    EXTENT_NOT_HEAD;
+			bool head_state = opt_retain ? EXTENT_IS_HEAD
+			                             : EXTENT_NOT_HEAD;
 			/*
 			 * Compute how much page-aligned gap space (if any) is
 			 * necessary to satisfy alignment.  This space can be
 			 * recycled for later use.
 			 */
-			void *gap_addr_page = (void *)(PAGE_CEILING(
-			    (uintptr_t)max_cur));
-			void *ret = (void *)ALIGNMENT_CEILING(
-			    (uintptr_t)gap_addr_page, alignment);
-			size_t gap_size_page = (uintptr_t)ret -
-			    (uintptr_t)gap_addr_page;
+			void *gap_addr_page = ALIGNMENT_ADDR2CEILING(
+			    max_cur, PAGE);
+			void *ret = ALIGNMENT_ADDR2CEILING(
+			    gap_addr_page, alignment);
+			size_t gap_size_page = (uintptr_t)ret
+			    - (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
-				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES, extent_sn_next(
-					&arena->pa_shard.pac),
-				    extent_state_active, false, true,
-				    EXTENT_PAI_PAC, head_state);
+				    gap_addr_page, gap_size_page,
+				    /* slab */ false,
+				    /* szind */ SC_NSIZES,
+				    extent_sn_next(&arena->pa_shard.pac),
+				    extent_state_active,
+				    /* zeroed */ false,
+				    /* committed */ true,
+				    /* pai */ EXTENT_PAI_PAC, head_state);
 			}
 			/*
 			 * Compute the address just past the end of the desired
 			 * allocation space.
 			 */
-			void *dss_next = (void *)((uintptr_t)ret + size);
-			if ((uintptr_t)ret < (uintptr_t)max_cur ||
-			    (uintptr_t)dss_next < (uintptr_t)max_cur) {
+			void *dss_next = (void *)((byte_t *)ret + size);
+			if ((uintptr_t)ret < (uintptr_t)max_cur
+			    || (uintptr_t)dss_next < (uintptr_t)max_cur) {
 				goto label_oom; /* Wrap-around. */
 			}
 			/* Compute the increment, including subpage bytes. */
-			void *gap_addr_subpage = max_cur;
-			size_t gap_size_subpage = (uintptr_t)ret -
-			    (uintptr_t)gap_addr_subpage;
+			void  *gap_addr_subpage = max_cur;
+			size_t gap_size_subpage = (uintptr_t)ret
+			    - (uintptr_t)gap_addr_subpage;
 			intptr_t incr = gap_size_subpage + size;
 
-			assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
-			    size);
+			assert(
+			    (uintptr_t)max_cur + incr == (uintptr_t)ret + size);
 
 			/* Try to allocate. */
 			void *dss_prev = extent_dss_sbrk(incr);
 			if (dss_prev == max_cur) {
 				/* Success. */
-				atomic_store_p(&dss_max, dss_next,
-				    ATOMIC_RELEASE);
+				atomic_store_p(
+				    &dss_max, dss_next, ATOMIC_RELEASE);
 				extent_dss_extending_finish();
 
 				if (gap_size_page != 0) {
@@ -200,17 +201,23 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					edata_t edata = {0};
+					edata_t   edata = {0};
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 
-					edata_init(&edata,
-					    arena_ind_get(arena), ret, size,
-					    size, false, SC_NSIZES,
-					    extent_state_active, false, true,
-					    EXTENT_PAI_PAC, head_state);
+					edata_init(&edata, arena_ind_get(arena),
+					    ret, size,
+					    /* slab */ false,
+					    /* szind */ SC_NSIZES,
+					    extent_sn_next(
+					        &arena->pa_shard.pac),
+					    extent_state_active,
+					    /* zeroed */ false,
+					    /* committed */ true,
+					    /* pai */ EXTENT_PAI_PAC,
+					    head_state);
 					if (extent_purge_forced_wrapper(tsdn,
-					    ehooks, &edata, 0, size)) {
+					        ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
@@ -220,10 +227,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * Failure, whether due to OOM or a race with a raw
 			 * sbrk() call from outside the allocator.
 			 */
-			if (dss_prev == (void *)-1) {
+			if (dss_prev == SBRK_INVALID) {
 				/* OOM. */
-				atomic_store_b(&dss_exhausted, true,
-				    ATOMIC_RELEASE);
+				atomic_store_b(
+				    &dss_exhausted, true, ATOMIC_RELEASE);
 				goto label_oom;
 			}
 		}
@@ -236,16 +243,16 @@ label_oom:
 
 static bool
 extent_in_dss_helper(void *addr, void *max) {
-	return ((uintptr_t)addr >= (uintptr_t)dss_base && (uintptr_t)addr <
-	    (uintptr_t)max);
+	return ((uintptr_t)addr >= (uintptr_t)dss_base
+	    && (uintptr_t)addr < (uintptr_t)max);
 }
 
 bool
 extent_in_dss(void *addr) {
 	cassert(have_dss);
 
-	return extent_in_dss_helper(addr, atomic_load_p(&dss_max,
-	    ATOMIC_ACQUIRE));
+	return extent_in_dss_helper(
+	    addr, atomic_load_p(&dss_max, ATOMIC_ACQUIRE));
 }
 
 bool
@@ -254,14 +261,14 @@ extent_dss_mergeable(void *addr_a, void *addr_b) {
 
 	cassert(have_dss);
 
-	if ((uintptr_t)addr_a < (uintptr_t)dss_base && (uintptr_t)addr_b <
-	    (uintptr_t)dss_base) {
+	if ((uintptr_t)addr_a < (uintptr_t)dss_base
+	    && (uintptr_t)addr_b < (uintptr_t)dss_base) {
 		return true;
 	}
 
 	max = atomic_load_p(&dss_max, ATOMIC_ACQUIRE);
-	return (extent_in_dss_helper(addr_a, max) ==
-	    extent_in_dss_helper(addr_b, max));
+	return (extent_in_dss_helper(addr_a, max)
+	    == extent_in_dss_helper(addr_b, max));
 }
 
 void
@@ -270,7 +277,8 @@ extent_dss_boot(void) {
 
 	dss_base = extent_dss_sbrk(0);
 	atomic_store_b(&dss_extending, false, ATOMIC_RELAXED);
-	atomic_store_b(&dss_exhausted, dss_base == (void *)-1, ATOMIC_RELAXED);
+	atomic_store_b(
+	    &dss_exhausted, dss_base == SBRK_INVALID, ATOMIC_RELAXED);
 	atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
 }
 
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 5f0ee2d2..d39bddc6 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -7,7 +7,7 @@
 /******************************************************************************/
 /* Data. */
 
-bool	opt_retain =
+bool opt_retain =
 #ifdef JEMALLOC_RETAIN
     true
 #else
@@ -18,8 +18,8 @@ bool	opt_retain =
 /******************************************************************************/
 
 void *
-extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit) {
+extent_alloc_mmap(
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) {
 	assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
 	void *ret = pages_map(new_addr, size, alignment, commit);
 	if (ret == NULL) {
diff --git a/src/fxp.c b/src/fxp.c
index 96585f0a..faeab207 100644
--- a/src/fxp.c
+++ b/src/fxp.c
@@ -83,8 +83,8 @@ fxp_parse(fxp_t *result, const char *str, char **end) {
 	}
 
 	assert(fractional_part < frac_div);
-	uint32_t fractional_repr = (uint32_t)(
-	    (fractional_part << 16) / frac_div);
+	uint32_t fractional_repr = (uint32_t)((fractional_part << 16)
+	    / frac_div);
 
 	/* Success! */
 	*result = (integer_part << 16) + fractional_repr;
@@ -99,7 +99,7 @@ fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]) {
 	uint32_t integer_part = fxp_round_down(a);
 	uint32_t fractional_part = (a & ((1U << 16) - 1));
 
-	int leading_fraction_zeros = 0;
+	int      leading_fraction_zeros = 0;
 	uint64_t fraction_digits = fractional_part;
 	for (int i = 0; i < FXP_FRACTIONAL_PART_DIGITS; i++) {
 		if (fraction_digits < (1U << 16)
@@ -113,12 +113,12 @@ fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]) {
 		fraction_digits /= 10;
 	}
 
-	size_t printed = malloc_snprintf(buf, FXP_BUF_SIZE, "%"FMTu32".",
-	    integer_part);
+	size_t printed = malloc_snprintf(
+	    buf, FXP_BUF_SIZE, "%" FMTu32 ".", integer_part);
 	for (int i = 0; i < leading_fraction_zeros; i++) {
 		buf[printed] = '0';
 		printed++;
 	}
-	malloc_snprintf(&buf[printed], FXP_BUF_SIZE - printed, "%"FMTu64,
-	    fraction_digits);
+	malloc_snprintf(
+	    &buf[printed], FXP_BUF_SIZE - printed, "%" FMTu64, fraction_digits);
 }
diff --git a/src/hook.c b/src/hook.c
index 493edbbe..4270ad60 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -9,19 +9,19 @@
 typedef struct hooks_internal_s hooks_internal_t;
 struct hooks_internal_s {
 	hooks_t hooks;
-	bool in_use;
+	bool    in_use;
 };
 
 seq_define(hooks_internal_t, hooks)
 
-static atomic_u_t nhooks = ATOMIC_INIT(0);
-static seq_hooks_t hooks[HOOK_MAX];
+    static atomic_u_t nhooks = ATOMIC_INIT(0);
+static seq_hooks_t    hooks[HOOK_MAX];
 static malloc_mutex_t hooks_mu;
 
 bool
-hook_boot() {
-	return malloc_mutex_init(&hooks_mu, "hooks", WITNESS_RANK_HOOK,
-	    malloc_mutex_rank_exclusive);
+hook_boot(void) {
+	return malloc_mutex_init(
+	    &hooks_mu, "hooks", WITNESS_RANK_HOOK, malloc_mutex_rank_exclusive);
 }
 
 static void *
@@ -84,23 +84,21 @@ hook_remove(tsdn_t *tsdn, void *opaque) {
 	malloc_mutex_unlock(tsdn, &hooks_mu);
 }
 
-#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr)				\
-for (int for_each_hook_counter = 0;					\
-    for_each_hook_counter < HOOK_MAX;					\
-    for_each_hook_counter++) {						\
-	bool for_each_hook_success = seq_try_load_hooks(		\
-	    (hooks_internal_ptr), &hooks[for_each_hook_counter]);	\
-	if (!for_each_hook_success) {					\
-		continue;						\
-	}								\
-	if (!(hooks_internal_ptr)->in_use) {				\
-		continue;						\
-	}
-#define FOR_EACH_HOOK_END						\
-}
+#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr)                                \
+	for (int for_each_hook_counter = 0; for_each_hook_counter < HOOK_MAX;  \
+	     for_each_hook_counter++) {                                        \
+		bool for_each_hook_success = seq_try_load_hooks(               \
+		    (hooks_internal_ptr), &hooks[for_each_hook_counter]);      \
+		if (!for_each_hook_success) {                                  \
+			continue;                                              \
+		}                                                              \
+		if (!(hooks_internal_ptr)->in_use) {                           \
+			continue;                                              \
+		}
+#define FOR_EACH_HOOK_END }
 
 static bool *
-hook_reentrantp() {
+hook_reentrantp(void) {
 	/*
 	 * We prevent user reentrancy within hooks.  This is basically just a
 	 * thread-local bool that triggers an early-exit.
@@ -129,26 +127,25 @@ hook_reentrantp() {
 	 * untouched.
 	 */
 	static bool in_hook_global = true;
-	tsdn_t *tsdn = tsdn_fetch();
-	bool *in_hook = tsdn_in_hookp_get(tsdn);
-	if (in_hook!= NULL) {
+	tsdn_t     *tsdn = tsdn_fetch();
+	bool       *in_hook = tsdn_in_hookp_get(tsdn);
+	if (in_hook != NULL) {
 		return in_hook;
 	}
 	return &in_hook_global;
 }
 
-#define HOOK_PROLOGUE							\
-	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {	\
-		return;							\
-	}								\
-	bool *in_hook = hook_reentrantp();				\
-	if (*in_hook) {							\
-		return;							\
-	}								\
+#define HOOK_PROLOGUE                                                          \
+	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {             \
+		return;                                                        \
+	}                                                                      \
+	bool *in_hook = hook_reentrantp();                                     \
+	if (*in_hook) {                                                        \
+		return;                                                        \
+	}                                                                      \
 	*in_hook = true;
 
-#define HOOK_EPILOGUE							\
-	*in_hook = false;
+#define HOOK_EPILOGUE *in_hook = false;
 
 void
 hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
@@ -157,10 +154,10 @@ hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
 
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_alloc h = hook.hooks.alloc_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, result, result_raw, args_raw);
-		}
+	hook_alloc h = hook.hooks.alloc_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, result, result_raw, args_raw);
+	}
 	FOR_EACH_HOOK_END
 
 	HOOK_EPILOGUE
@@ -171,10 +168,10 @@ hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 	HOOK_PROLOGUE
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_dalloc h = hook.hooks.dalloc_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, address, args_raw);
-		}
+	hook_dalloc h = hook.hooks.dalloc_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, address, args_raw);
+	}
 	FOR_EACH_HOOK_END
 	HOOK_EPILOGUE
 }
@@ -185,11 +182,11 @@ hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
 	HOOK_PROLOGUE
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_expand h = hook.hooks.expand_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, address, old_usize, new_usize,
-			    result_raw, args_raw);
-		}
+	hook_expand h = hook.hooks.expand_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, address, old_usize, new_usize,
+		    result_raw, args_raw);
+	}
 	FOR_EACH_HOOK_END
 	HOOK_EPILOGUE
 }
diff --git a/src/hpa.c b/src/hpa.c
index 7e2aeba0..7e5b5f72 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -2,29 +2,38 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/hpa_utils.h"
 
 #include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/witness.h"
-
-#define HPA_EDEN_SIZE (128 * HUGEPAGE)
+#include "jemalloc/internal/jemalloc_probe.h"
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
-static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
-static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated);
+static bool     hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool     hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void     hpa_dalloc(
+        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
+static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list, bool *deferred_work_generated);
+
+const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
+
+bool opt_experimental_hpa_start_huge_if_thp_always = true;
+bool opt_experimental_hpa_enforce_hugify = false;
+
 bool
-hpa_supported() {
+hpa_hugepage_size_exceeds_limit(void) {
+	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
+}
+
+bool
+hpa_supported(void) {
 #ifdef _WIN32
 	/*
 	 * At least until the API and implementation is somewhat settled, we
@@ -50,6 +59,10 @@ hpa_supported() {
 	if (HUGEPAGE_PAGES == 1) {
 		return false;
 	}
+	/* As mentioned in pages.h, do not support If HUGEPAGE is too large. */
+	if (hpa_hugepage_size_exceeds_limit()) {
+		return false;
+	}
 	return true;
 }
 
@@ -59,125 +72,9 @@ hpa_do_consistency_checks(hpa_shard_t *shard) {
 }
 
 bool
-hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
-	/* malloc_conf processing should have filtered out these cases. */
-	assert(hpa_supported());
-	bool err;
-	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
-	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
-	}
-	err = malloc_mutex_init(&central->mtx, "hpa_central",
-	    WITNESS_RANK_HPA_CENTRAL, malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
-	}
-	central->base = base;
-	central->eden = NULL;
-	central->eden_len = 0;
-	central->age_counter = 0;
-	central->hooks = *hooks;
-	return false;
-}
-
-static hpdata_t *
-hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
-	return (hpdata_t *)base_alloc(tsdn, central->base, sizeof(hpdata_t),
-	    CACHELINE);
-}
-
-hpdata_t *
-hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
-    bool *oom) {
-	/* Don't yet support big allocations; these should get filtered out. */
-	assert(size <= HUGEPAGE);
-	/*
-	 * Should only try to extract from the central allocator if the local
-	 * shard is exhausted.  We should hold the grow_mtx on that shard.
-	 */
-	witness_assert_positive_depth_to_rank(
-	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
-
-	malloc_mutex_lock(tsdn, &central->grow_mtx);
-	*oom = false;
-
-	hpdata_t *ps = NULL;
-
-	/* Is eden a perfect fit? */
-	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		hpdata_init(ps, central->eden, central->age_counter++);
-		central->eden = NULL;
-		central->eden_len = 0;
-		malloc_mutex_unlock(tsdn, &central->grow_mtx);
-		return ps;
-	}
-
-	/*
-	 * We're about to try to allocate from eden by splitting.  If eden is
-	 * NULL, we have to allocate it too.  Otherwise, we just have to
-	 * allocate an edata_t for the new psset.
-	 */
-	if (central->eden == NULL) {
-		/*
-		 * During development, we're primarily concerned with systems
-		 * with overcommit.  Eventually, we should be more careful here.
-		 */
-		bool commit = true;
-		/* Allocate address space, bailing if we fail. */
-		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
-		    &commit);
-		if (new_eden == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			pages_unmap(new_eden, HPA_EDEN_SIZE);
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		central->eden = new_eden;
-		central->eden_len = HPA_EDEN_SIZE;
-	} else {
-		/* Eden is already nonempty; only need an edata for ps. */
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-	}
-	assert(ps != NULL);
-	assert(central->eden != NULL);
-	assert(central->eden_len > HUGEPAGE);
-	assert(central->eden_len % HUGEPAGE == 0);
-	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
-
-	hpdata_init(ps, central->eden, central->age_counter++);
-
-	char *eden_char = (char *)central->eden;
-	eden_char += HUGEPAGE;
-	central->eden = (void *)eden_char;
-	central->eden_len -= HUGEPAGE;
-
-	malloc_mutex_unlock(tsdn, &central->grow_mtx);
-
-	return ps;
-}
-
-bool
-hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
-    base_t *base, edata_cache_t *edata_cache, unsigned ind,
-    const hpa_shard_opts_t *opts) {
+hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
+    emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -205,10 +102,12 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 
 	shard->npending_purge = 0;
 	nstime_init_zero(&shard->last_purge);
+	nstime_init_zero(&shard->last_time_work_attempted);
 
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
 	shard->stats.nhugifies = 0;
+	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
 
 	/*
@@ -217,13 +116,16 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	 * operating on corrupted data.
 	 */
 	shard->pai.alloc = &hpa_alloc;
-	shard->pai.alloc_batch = &hpa_alloc_batch;
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
-	shard->pai.dalloc_batch = &hpa_dalloc_batch;
 	shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work;
 
+	err = sec_init(tsdn, &shard->sec, base, sec_opts);
+	if (err) {
+		return true;
+	}
+
 	hpa_do_consistency_checks(shard);
 
 	return false;
@@ -232,28 +134,30 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 /*
  * Note that the stats functions here follow the usual stats naming conventions;
  * "merge" obtains the stats from some live object of instance, while "accum"
- * only combines the stats from one stats objet to another.  Hence the lack of
+ * only combines the stats from one stats object to another.  Hence the lack of
  * locking here.
  */
 static void
-hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
-    hpa_shard_nonderived_stats_t *src) {
+hpa_shard_nonderived_stats_accum(
+    hpa_shard_nonderived_stats_t *dst, hpa_shard_nonderived_stats_t *src) {
 	dst->npurge_passes += src->npurge_passes;
 	dst->npurges += src->npurges;
 	dst->nhugifies += src->nhugifies;
+	dst->nhugify_failures += src->nhugify_failures;
 	dst->ndehugifies += src->ndehugifies;
 }
 
 void
 hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
-	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats,
-	    &src->nonderived_stats);
+	hpa_shard_nonderived_stats_accum(
+	    &dst->nonderived_stats, &src->nonderived_stats);
+	sec_stats_accum(&dst->secstats, &src->secstats);
 }
 
 void
-hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpa_shard_stats_t *dst) {
+hpa_shard_stats_merge(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) {
 	hpa_do_consistency_checks(shard);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
@@ -262,6 +166,36 @@ hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, &shard->stats);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+
+	sec_stats_merge(tsdn, &shard->sec, &dst->secstats);
+}
+
+static bool
+hpa_is_hugify_eager(hpa_shard_t *shard) {
+	return shard->opts.hugify_style == hpa_hugify_style_eager;
+}
+
+static bool
+hpa_is_hugify_lazy(hpa_shard_t *shard) {
+	/* When hugify_sync==true we also set/unset HG bit manually */
+	return shard->opts.hugify_style == hpa_hugify_style_lazy
+	    || shard->opts.hugify_sync;
+}
+
+static bool
+hpa_is_hugify_none(hpa_shard_t *shard) {
+	return shard->opts.hugify_style == hpa_hugify_style_none;
+}
+
+/*
+ * Experimentation has shown that when we are purging only HUGEPAGE ranges and
+ * hugifying eagerly (or thp enabled=always) we get huge pages more often.  This
+ * helps us have more realistic accounting.
+ */
+static bool
+hpa_should_assume_huge(hpa_shard_t *shard, const hpdata_t *ps) {
+	return (hpa_is_hugify_eager(shard) || hpa_is_hugify_none(shard))
+	    && hpdata_purged_when_empty_and_huge_get(ps);
 }
 
 static bool
@@ -275,6 +209,20 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	    >= shard->opts.hugification_threshold;
 }
 
+static bool
+hpa_good_purge_candidate(hpa_shard_t *shard, hpdata_t *ps) {
+	if (shard->opts.dirty_mult == (fxp_t)-1) {
+		/* No purging. */
+		return false;
+	}
+	size_t ndirty = hpdata_ndirty_get(ps);
+	/* Empty pages are good candidate for purging. */
+	if (ndirty > 0 && hpdata_empty(ps)) {
+		return true;
+	}
+	return ndirty * PAGE >= shard->opts.purge_threshold;
+}
+
 static size_t
 hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
@@ -287,8 +235,8 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
 		return (size_t)-1;
 	}
-	return fxp_mul_frac(psset_nactive(&shard->psset),
-	    shard->opts.dirty_mult);
+	return fxp_mul_frac(
+	    psset_nactive(&shard->psset), shard->opts.dirty_mult);
 }
 
 static bool
@@ -299,12 +247,21 @@ hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
 		return false;
 	}
 	return hpa_adjusted_ndirty(tsdn, shard)
-	    + hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard);
+	    + hpdata_nretained_get(to_hugify)
+	    > hpa_ndirty_max(tsdn, shard);
 }
 
 static bool
 hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	/*
+	 * The page that is purgable may be delayed, but we just want to know
+	 * if there is a need for bg thread to wake up in the future.
+	 */
+	hpdata_t *ps = psset_pick_purge(&shard->psset, NULL);
+	if (ps == NULL) {
+		return false;
+	}
 	if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) {
 		return true;
 	}
@@ -315,8 +272,22 @@ hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 static void
-hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpdata_t *ps) {
+hpa_assume_huge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	assert(hpa_should_assume_huge(shard, ps));
+	if (hpdata_huge_get(ps) || hpdata_empty(ps)) {
+		return;
+	}
+
+	if (hpdata_ntouched_get(ps) != HUGEPAGE_PAGES) {
+		hpdata_hugify(ps);
+	}
+}
+
+static void
+hpa_update_purge_hugify_eligibility(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
 		hpdata_purge_allowed_set(ps, false);
@@ -345,26 +316,41 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * allocator's end at all; we just try to pack allocations in a
 	 * hugepage-friendly manner and let the OS hugify in the background.
 	 */
-	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
-	if (hpa_good_hugification_candidate(shard, ps)
+	if (hpa_should_assume_huge(shard, ps)) {
+		/* Assume it is huge without the need to madvise */
+		hpa_assume_huge(tsdn, shard, ps);
+	}
+	if ((hpa_is_hugify_lazy(shard) || opt_experimental_hpa_enforce_hugify)
+	    && hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
 		shard->central->hooks.curtime(&now, /* first_reading */ true);
 		hpdata_allow_hugify(ps, now);
 	}
+	bool purgable = hpa_good_purge_candidate(shard, ps);
+	if (purgable && !hpdata_purge_allowed_get(ps)
+	    && (shard->opts.min_purge_delay_ms > 0)) {
+		nstime_t now;
+		uint64_t delayns = shard->opts.min_purge_delay_ms * 1000 * 1000;
+		shard->central->hooks.curtime(&now, /* first_reading */ true);
+		nstime_iadd(&now, delayns);
+		hpdata_time_purge_allowed_set(ps, &now);
+	}
+	hpdata_purge_allowed_set(ps, purgable);
+
 	/*
 	 * Once a hugepage has become eligible for hugification, we don't mark
 	 * it as ineligible just because it stops meeting the criteria (this
 	 * could lead to situations where a hugepage that spends most of its
 	 * time meeting the criteria never quite getting hugified if there are
 	 * intervening deallocations).  The idea is that the hugification delay
-	 * will allow them to get purged, reseting their "hugify-allowed" bit.
+	 * will allow them to get purged, resetting their "hugify-allowed" bit.
 	 * If they don't get purged, then the hugification isn't hurting and
 	 * might help.  As an exception, we don't hugify hugepages that are now
 	 * empty; it definitely doesn't help there until the hugepage gets
 	 * reused, which is likely not for a while.
 	 */
-	if (hpdata_nactive_get(ps) == 0) {
+	if (hpdata_nactive_get(ps) == 0 && !hpa_should_assume_huge(shard, ps)) {
 		hpdata_disallow_hugify(ps);
 	}
 }
@@ -376,14 +362,26 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
-/* Returns whether or not we purged anything. */
-static bool
-hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+static inline bool
+hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) {
+	return (hpa_is_hugify_lazy(shard)
+	           || opt_experimental_hpa_enforce_hugify)
+	    && hpdata_huge_get(ps) && !hpdata_empty(ps);
+}
 
-	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
+/* Prepare purge of one page. Return number of dirty regular pages on it
+ * Return 0 if no purgable huge page is found
+ *
+ * If there was a page to purge its purge state is initialized
+ */
+static inline size_t
+hpa_purge_start_hp(hpa_purge_batch_t *b, hpa_shard_t *shard) {
+	psset_t  *psset = &shard->psset;
+	hpdata_t *to_purge = (shard->opts.min_purge_delay_ms > 0)
+	    ? psset_pick_purge(psset, &shard->last_time_work_attempted)
+	    : psset_pick_purge(psset, NULL);
 	if (to_purge == NULL) {
-		return false;
+		return 0;
 	}
 	assert(hpdata_purge_allowed_get(to_purge));
 	assert(!hpdata_changing_state_get(to_purge));
@@ -393,7 +391,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * we're purging it (allocations and deallocations are
 	 * OK).
 	 */
-	psset_update_begin(&shard->psset, to_purge);
+	psset_update_begin(psset, to_purge);
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
 	hpdata_purge_allowed_set(to_purge, false);
@@ -406,59 +404,103 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * (clearing out user data).
 	 */
 	hpdata_alloc_allowed_set(to_purge, false);
-	psset_update_end(&shard->psset, to_purge);
+	psset_update_end(psset, to_purge);
 
+	assert(b->item_cnt < b->items_capacity);
+	hpa_purge_item_t *hp_item = &b->items[b->item_cnt];
+	b->item_cnt++;
+	hp_item->hp = to_purge;
 	/* Gather all the metadata we'll need during the purge. */
-	bool dehugify = hpdata_huge_get(to_purge);
-	hpdata_purge_state_t purge_state;
-	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state);
+	hp_item->dehugify = hpa_needs_dehugify(shard, hp_item->hp);
+	hpdata_purged_when_empty_and_huge_set(hp_item->hp,
+	    hpdata_huge_get(hp_item->hp) && hpdata_empty(hp_item->hp));
+	size_t nranges;
+	size_t ndirty = hpdata_purge_begin(
+	    hp_item->hp, &hp_item->state, &nranges);
+	/* We picked hp to purge, so it should have some dirty ranges */
+	assert(ndirty > 0 && nranges > 0);
+	b->ndirty_in_batch += ndirty;
+	b->nranges += nranges;
+	return ndirty;
+}
 
-	shard->npending_purge += num_to_purge;
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-
-	/* Actually do the purging, now that the lock is dropped. */
-	if (dehugify) {
-		shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
-		    HUGEPAGE);
-	}
-	size_t total_purged = 0;
-	uint64_t purges_this_pass = 0;
-	void *purge_addr;
-	size_t purge_size;
-	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
-	    &purge_size)) {
-		total_purged += purge_size;
-		assert(total_purged <= HUGEPAGE);
-		purges_this_pass++;
-		shard->central->hooks.purge(purge_addr, purge_size);
-	}
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	/* The shard updates */
-	shard->npending_purge -= num_to_purge;
-	shard->stats.npurge_passes++;
-	shard->stats.npurges += purges_this_pass;
-	shard->central->hooks.curtime(&shard->last_purge,
-	    /* first_reading */ false);
-	if (dehugify) {
+/* Finish purge of one huge page. */
+static inline void
+hpa_purge_finish_hp(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_purge_item_t *hp_item) {
+	if (hp_item->dehugify) {
 		shard->stats.ndehugifies++;
 	}
-
 	/* The hpdata updates. */
-	psset_update_begin(&shard->psset, to_purge);
-	if (dehugify) {
-		hpdata_dehugify(to_purge);
+	psset_update_begin(&shard->psset, hp_item->hp);
+	if (hpdata_huge_get(hp_item->hp)) {
+		/*
+		 * Even when dehugify is not explicitly called, the page is
+		 * assumed to be non-huge after purge.
+		 */
+		hpdata_dehugify(hp_item->hp);
 	}
-	hpdata_purge_end(to_purge, &purge_state);
-	hpdata_mid_purge_set(to_purge, false);
+	hpdata_purge_end(hp_item->hp, &hp_item->state);
+	hpdata_mid_purge_set(hp_item->hp, false);
 
-	hpdata_alloc_allowed_set(to_purge, true);
-	hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge);
+	hpdata_alloc_allowed_set(hp_item->hp, true);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, hp_item->hp);
 
-	psset_update_end(&shard->psset, to_purge);
+	psset_update_end(&shard->psset, hp_item->hp);
+}
 
-	return true;
+/* Returns number of huge pages purged. */
+static inline size_t
+hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	assert(max_hp > 0);
+
+	assert(HPA_PURGE_BATCH_MAX > 0);
+	assert(HPA_PURGE_BATCH_MAX
+	    < (VARIABLE_ARRAY_SIZE_MAX / sizeof(hpa_purge_item_t)));
+	VARIABLE_ARRAY(hpa_purge_item_t, items, HPA_PURGE_BATCH_MAX);
+	hpa_purge_batch_t batch = {
+	    .max_hp = max_hp,
+	    .npurged_hp_total = 0,
+	    .items = &items[0],
+	    .items_capacity = HPA_PURGE_BATCH_MAX,
+	    .range_watermark = hpa_process_madvise_max_iovec_len(),
+	};
+	assert(batch.range_watermark > 0);
+
+	while (1) {
+		hpa_batch_pass_start(&batch);
+		assert(hpa_batch_empty(&batch));
+		while (
+		    !hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
+			size_t ndirty = hpa_purge_start_hp(&batch, shard);
+			if (ndirty == 0) {
+				break;
+			}
+			shard->npending_purge += ndirty;
+			batch.npurged_hp_total++;
+		}
+
+		if (hpa_batch_empty(&batch)) {
+			break;
+		}
+		hpa_hooks_t *hooks = &shard->central->hooks;
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		hpa_purge_batch(hooks, batch.items, batch.item_cnt);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+
+		/* The shard updates */
+		shard->npending_purge -= batch.ndirty_in_batch;
+		shard->stats.npurges += batch.ndirty_in_batch;
+		shard->central->hooks.curtime(&shard->last_purge,
+		    /* first_reading */ false);
+		for (size_t i = 0; i < batch.item_cnt; ++i) {
+			hpa_purge_finish_hp(tsdn, shard, &batch.items[i]);
+		}
+	}
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	shard->stats.npurge_passes++;
+	return batch.npurged_hp_total;
 }
 
 /* Returns whether or not we hugified anything. */
@@ -495,13 +537,34 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpdata_disallow_hugify(to_hugify);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-
-	shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	shard->stats.nhugifies++;
+	/*
+	 * Without lazy hugification, user relies on eagerly setting HG bit, or
+	 * leaving everything up to the kernel (ex: thp enabled=always).  We
+	 * will still pretend that call succeeds to keep our accounting close to
+	 * what user believes is the truth on the target system, but we won't
+	 * update nhugifies stat as system call is not being made.
+	 */
+	if (hpa_is_hugify_lazy(shard) || opt_experimental_hpa_enforce_hugify) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		bool err = shard->central->hooks.hugify(
+		    hpdata_addr_get(to_hugify), HUGEPAGE,
+		    shard->opts.hugify_sync);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		shard->stats.nhugifies++;
+		if (err) {
+			/*
+			 * When asynchronous hugification is used
+			 * (shard->opts.hugify_sync option is false), we are not
+			 * expecting to get here, unless something went terrible
+			 * wrong. Because underlying syscall is only setting
+			 * kernel flag for memory range (actual hugification
+			 * happens asynchronously and we are not getting any
+			 * feedback about its outcome), we expect syscall to be
+			 * successful all the time.
+			 */
+			shard->stats.nhugify_failures++;
+		}
+	}
 
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_hugify(to_hugify);
@@ -512,51 +575,87 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return true;
 }
 
+static bool
+hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	uint64_t since_last_purge_ms = nstime_ms_between(
+	    &shard->last_purge, &shard->last_time_work_attempted);
+	return since_last_purge_ms >= shard->opts.min_purge_interval_ms;
+}
+
+static inline void
+hpa_update_time_work_attempted(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	shard->central->hooks.curtime(&shard->last_time_work_attempted,
+	    /* first_reading */ false);
+}
+
 /*
  * Execution of deferred work is forced if it's triggered by an explicit
  * hpa_shard_do_deferred_work() call.
  */
 static void
-hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool forced) {
+hpa_shard_maybe_do_deferred_work(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool forced) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
+	hpa_update_time_work_attempted(tsdn, shard);
+
 	/*
 	 * If we're on a background thread, do work so long as there's work to
 	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
 	 * most a small fixed number of operations.
 	 */
-	bool hugified = false;
-	bool purged = false;
 	size_t max_ops = (forced ? (size_t)-1 : 16);
 	size_t nops = 0;
-	do {
+
+	/*
+	 * Always purge before hugifying, to make sure we get some
+	 * ability to hit our quiescence targets.
+	 */
+
+	/*
+	 * Make sure we respect purge interval setting and don't purge
+	 * too frequently.
+	 */
+	if (hpa_min_purge_interval_passed(tsdn, shard)) {
+		size_t max_purges = max_ops;
 		/*
-		 * Always purge before hugifying, to make sure we get some
-		 * ability to hit our quiescence targets.
+		 * Limit number of hugepages (slabs) to purge.
+		 * When experimental_max_purge_nhp option is used, there is no
+		 * guarantee we'll always respect dirty_mult option.  Option
+		 * experimental_max_purge_nhp provides a way to configure same
+		 * behavior as was possible before, with buggy implementation
+		 * of purging algorithm.
 		 */
-		purged = false;
-		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
-			purged = hpa_try_purge(tsdn, shard);
-			if (purged) {
-				nops++;
-			}
-		}
-		hugified = hpa_try_hugify(tsdn, shard);
-		if (hugified) {
-			nops++;
+		ssize_t max_purge_nhp = shard->opts.experimental_max_purge_nhp;
+		if (max_purge_nhp != -1 && max_purges > (size_t)max_purge_nhp) {
+			max_purges = max_purge_nhp;
 		}
+
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		nops += hpa_purge(tsdn, shard, max_purges);
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	} while ((hugified || purged) && nops < max_ops);
+	}
+
+	/*
+	 * Try to hugify at least once, even if we out of operations to make at
+	 * least some progress on hugification even at worst case.
+	 */
+	while (hpa_try_hugify(tsdn, shard) && nops < max_ops) {
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		nops++;
+	}
 }
 
 static edata_t *
-hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom) {
-	bool err;
+hpa_try_alloc_one_no_grow(
+    tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	bool     err;
 	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
 		*oom = true;
@@ -576,15 +675,17 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		 * If the pageslab used to be empty, treat it as though it's
 		 * brand new for fragmentation-avoidance purposes; what we're
 		 * trying to approximate is the age of the allocations *in* that
-		 * pageslab, and the allocations in the new pageslab are
-		 * definitionally the youngest in this hpa shard.
+		 * pageslab, and the allocations in the new pageslab are by
+		 * definition the youngest in this hpa shard.
 		 */
 		hpdata_age_set(ps, shard->age_counter++);
 	}
 
 	void *addr = hpdata_reserve_alloc(ps, size);
-	edata_init(edata, shard->ind, addr, size, /* slab */ false,
-	    SC_NSIZES, /* sn */ hpdata_age_get(ps), extent_state_active,
+	JE_USDT(hpa_alloc, 5, shard->ind, addr, size, hpdata_nactive_get(ps),
+	    hpdata_age_get(ps));
+	edata_init(edata, shard->ind, addr, size, /* slab */ false, SC_NSIZES,
+	    /* sn */ hpdata_age_get(ps), extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(edata, ps);
@@ -597,11 +698,14 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * dropped.  This would force us to deal with a pageslab eviction down
 	 * the error pathway, which is a pain.
 	 */
-	err = emap_register_boundary(tsdn, shard->emap, edata,
-	    SC_NSIZES, /* slab */ false);
+	err = emap_register_boundary(
+	    tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (err) {
-		hpdata_unreserve(ps, edata_addr_get(edata),
-		    edata_size_get(edata));
+		hpdata_unreserve(
+		    ps, edata_addr_get(edata), edata_size_get(edata));
+		JE_USDT(hpa_dalloc_err, 5, shard->ind, edata_addr_get(edata),
+		    edata_size_get(edata), hpdata_nactive_get(ps),
+		    hpdata_age_get(ps));
 		/*
 		 * We should arguably reset dirty state here, but this would
 		 * require some sort of prepare + commit functionality that's a
@@ -623,14 +727,14 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 }
 
 static size_t
-hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom, size_t nallocs, edata_list_active_t *results,
+hpa_try_alloc_batch_no_grow_locked(tsdn_t *tsdn, hpa_shard_t *shard,
+    size_t size, bool *oom, size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
-	malloc_mutex_lock(tsdn, &shard->mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	size_t nsuccess = 0;
 	for (; nsuccess < nallocs; nsuccess++) {
-		edata_t *edata = hpa_try_alloc_one_no_grow(tsdn, shard, size,
-		    oom);
+		edata_t *edata = hpa_try_alloc_one_no_grow(
+		    tsdn, shard, size, oom);
 		if (edata == NULL) {
 			break;
 		}
@@ -639,6 +743,16 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
 	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
+	return nsuccess;
+}
+
+static size_t
+hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    bool *oom, size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	size_t nsuccess = hpa_try_alloc_batch_no_grow_locked(
+	    tsdn, shard, size, oom, nallocs, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
@@ -647,11 +761,12 @@ static size_t
 hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
-	assert(size <= shard->opts.slab_max_alloc);
+	assert(size <= HUGEPAGE);
+	assert(size <= shard->opts.slab_max_alloc || size == sz_s2u(size));
 	bool oom = false;
 
-	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs, results, deferred_work_generated);
+	size_t nsuccess = hpa_try_alloc_batch_no_grow(
+	    tsdn, shard, size, &oom, nallocs, results, deferred_work_generated);
 
 	if (nsuccess == nallocs || oom) {
 		return nsuccess;
@@ -678,24 +793,24 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size, &oom);
+	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size,
+	    shard->age_counter++, hpa_is_hugify_eager(shard), &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
 	}
 
 	/*
-	 * We got the pageslab; allocate from it.  This does an unlock followed
-	 * by a lock on the same mutex, and holds the grow mutex while doing
-	 * deferred work, but this is an uncommon path; the simplicity is worth
-	 * it.
+	 * We got the pageslab; allocate from it.  This holds the grow mutex
+	 * while doing deferred work, but this is an uncommon path; the
+	 * simplicity is worth it.
 	 */
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_insert(&shard->psset, ps);
+	nsuccess += hpa_try_alloc_batch_no_grow_locked(tsdn, shard, size, &oom,
+	    nallocs - nsuccess, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results, deferred_work_generated);
 	/*
 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
 	 * should be allowed to proceed while we're working.
@@ -707,32 +822,16 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 static hpa_shard_t *
 hpa_from_pai(pai_t *self) {
-	assert(self->alloc = &hpa_alloc);
-	assert(self->expand = &hpa_expand);
-	assert(self->shrink = &hpa_shrink);
-	assert(self->dalloc = &hpa_dalloc);
+	assert(self->alloc == &hpa_alloc);
+	assert(self->expand == &hpa_expand);
+	assert(self->shrink == &hpa_shrink);
+	assert(self->dalloc == &hpa_dalloc);
 	return (hpa_shard_t *)self;
 }
 
-static size_t
-hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
-	assert(nallocs > 0);
-	assert((size & PAGE_MASK) == 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	hpa_shard_t *shard = hpa_from_pai(self);
-
-	if (size > shard->opts.slab_max_alloc) {
-		return 0;
-	}
-
-	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
-	    results, deferred_work_generated);
-
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
+static void
+hpa_assert_results(
+    tsdn_t *tsdn, hpa_shard_t *shard, edata_list_active_t *results) {
 	/*
 	 * Guard the sanity checks with config_debug because the loop cannot be
 	 * proven non-circular by the compiler, even if everything within the
@@ -740,20 +839,19 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	 */
 	if (config_debug) {
 		edata_t *edata;
-		ql_foreach(edata, &results->head, ql_link_active) {
+		ql_foreach (edata, &results->head, ql_link_active) {
 			emap_assert_mapped(tsdn, shard->emap, edata);
 			assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 			assert(edata_state_get(edata) == extent_state_active);
 			assert(edata_arena_ind_get(edata) == shard->ind);
-			assert(edata_szind_get_maybe_invalid(edata) ==
-			    SC_NSIZES);
+			assert(
+			    edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
 			assert(!edata_slab_get(edata));
 			assert(edata_committed_get(edata));
 			assert(edata_base_get(edata) == edata_addr_get(edata));
 			assert(edata_base_get(edata) != NULL);
 		}
 	}
-	return nsuccess;
 }
 
 static edata_t *
@@ -761,23 +859,59 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
     bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	assert(!guarded);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/* We don't handle alignment or zeroing for now. */
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
+	hpa_shard_t *shard = hpa_from_pai(self);
+
 	/*
-	 * An alloc with alignment == PAGE and zero == false is equivalent to a
-	 * batch alloc of 1.  Just do that, so we can share code.
+	 * frequent_use here indicates this request comes from the arena bins,
+	 * in which case it will be split into slabs, and therefore there is no
+	 * intrinsic slack in the allocation (the entire range of allocated size
+	 * will be accessed).
+	 *
+	 * In this case bypass the slab_max_alloc limit (if still within the
+	 * huge page size).  These requests do not concern internal
+	 * fragmentation with huge pages (again, the full size will be used).
 	 */
+	if (!(frequent_reuse && size <= HUGEPAGE)
+	    && (size > shard->opts.slab_max_alloc)) {
+		return NULL;
+	}
+	edata_t *edata = sec_alloc(tsdn, &shard->sec, size);
+	if (edata != NULL) {
+		return edata;
+	}
+	size_t              nallocs = sec_size_supported(&shard->sec, size)
+	                 ? shard->sec.opts.batch_fill_extra + 1
+	                 : 1;
 	edata_list_active_t results;
 	edata_list_active_init(&results);
-	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
-	    &results, deferred_work_generated);
-	assert(nallocs == 0 || nallocs == 1);
-	edata_t *edata = edata_list_active_first(&results);
+	size_t nsuccess = hpa_alloc_batch_psset(
+	    tsdn, shard, size, nallocs, &results, deferred_work_generated);
+	hpa_assert_results(tsdn, shard, &results);
+	edata = edata_list_active_first(&results);
+
+	if (edata != NULL) {
+		edata_list_active_remove(&results, edata);
+		assert(nsuccess > 0);
+		nsuccess--;
+	}
+	if (nsuccess > 0) {
+		assert(sec_size_supported(&shard->sec, size));
+		sec_fill(tsdn, &shard->sec, size, &results, nsuccess);
+		/* Unlikely rollback in case of overfill */
+		if (!edata_list_active_empty(&results)) {
+			hpa_dalloc_batch(
+			    tsdn, self, &results, deferred_work_generated);
+		}
+	}
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	return edata;
 }
 
@@ -789,8 +923,8 @@ hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 }
 
 static bool
-hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated) {
+hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool *deferred_work_generated) {
 	/* Shrink not yet supported. */
 	return true;
 }
@@ -835,12 +969,14 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
 	/* Currently, all edatas come from pageslabs. */
 	assert(ps != NULL);
-	void *unreserve_addr = edata_addr_get(edata);
+	void  *unreserve_addr = edata_addr_get(edata);
 	size_t unreserve_size = edata_size_get(edata);
 	edata_cache_fast_put(tsdn, &shard->ecf, edata);
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
+	JE_USDT(hpa_dalloc, 5, shard->ind, unreserve_addr, unreserve_size,
+	    hpdata_nactive_get(ps), hpdata_age_get(ps));
 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
 }
@@ -851,7 +987,7 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
 	hpa_shard_t *shard = hpa_from_pai(self);
 
 	edata_t *edata;
-	ql_foreach(edata, &list->head, ql_link_active) {
+	ql_foreach (edata, &list->head, ql_link_active) {
 		hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
 	}
 
@@ -862,20 +998,28 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
-	*deferred_work_generated =
-	    hpa_shard_has_deferred_work(tsdn, shard);
+	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+hpa_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	assert(!edata_guarded_get(edata));
-	/* Just a dalloc_batch of size 1; this lets us share logic. */
+
 	edata_list_active_t dalloc_list;
 	edata_list_active_init(&dalloc_list);
 	edata_list_active_append(&dalloc_list, edata);
+
+	hpa_shard_t *shard = hpa_from_pai(self);
+	sec_dalloc(tsdn, &shard->sec, &dalloc_list);
+	if (edata_list_active_empty(&dalloc_list)) {
+		/* sec consumed the pointer */
+		*deferred_work_generated = false;
+		return;
+	}
+	/* We may have more than one pointer to flush now */
 	hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated);
 }
 
@@ -886,14 +1030,14 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static uint64_t
 hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	hpa_shard_t *shard = hpa_from_pai(self);
-	uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
+	uint64_t     time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 
 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
 	if (to_hugify != NULL) {
-		nstime_t time_hugify_allowed =
-		    hpdata_time_hugify_allowed(to_hugify);
+		nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(
+		    to_hugify);
 		uint64_t since_hugify_allowed_ms =
 		    shard->central->hooks.ms_since(&time_hugify_allowed);
 		/*
@@ -901,8 +1045,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 		 * sleep for the rest.
 		 */
 		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
-			time_ns = shard->opts.hugify_delay_ms -
-			    since_hugify_allowed_ms;
+			time_ns = shard->opts.hugify_delay_ms
+			    - since_hugify_allowed_ms;
 			time_ns *= 1000 * 1000;
 		} else {
 			malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -924,8 +1068,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 
 		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
 			uint64_t until_purge_ns;
-			until_purge_ns = shard->opts.min_purge_interval_ms -
-			    since_last_purge_ms;
+			until_purge_ns = shard->opts.min_purge_interval_ms
+			    - since_last_purge_ms;
 			until_purge_ns *= 1000 * 1000;
 
 			if (until_purge_ns < time_ns) {
@@ -939,15 +1083,32 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	return time_ns;
 }
 
+static void
+hpa_sec_flush_impl(tsdn_t *tsdn, hpa_shard_t *shard) {
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+
+	sec_flush(tsdn, &shard->sec, &to_flush);
+	bool deferred_work_generated;
+	hpa_dalloc_batch(
+	    tsdn, (pai_t *)shard, &to_flush, &deferred_work_generated);
+}
+
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
+	hpa_sec_flush_impl(tsdn, shard);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_cache_fast_disable(tsdn, &shard->ecf);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+void
+hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_sec_flush_impl(tsdn, shard);
+}
+
 static void
 hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 	assert(bin_stats->npageslabs == 0);
@@ -969,6 +1130,7 @@ hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
+	hpa_shard_flush(tsdn, shard);
 	/*
 	 * By the time we're here, the arena code should have dalloc'd all the
 	 * active extents, which means we should have eventually evicted
@@ -990,8 +1152,8 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 void
-hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool deferral_allowed) {
+hpa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed) {
 	hpa_do_consistency_checks(shard);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -1013,6 +1175,12 @@ hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+void
+hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+	sec_prefork2(tsdn, &shard->sec);
+}
+
 void
 hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
@@ -1031,6 +1199,7 @@ void
 hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
 
+	sec_postfork_parent(tsdn, &shard->sec);
 	malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->mtx);
 }
@@ -1039,6 +1208,7 @@ void
 hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
 
+	sec_postfork_child(tsdn, &shard->sec);
 	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->mtx);
 }
diff --git a/src/hpa_central.c b/src/hpa_central.c
new file mode 100644
index 00000000..b4f770c2
--- /dev/null
+++ b/src/hpa_central.c
@@ -0,0 +1,121 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_central.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/witness.h"
+
+#define HPA_EDEN_SIZE (128 * HUGEPAGE)
+
+bool
+hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
+	/* malloc_conf processing should have filtered out these cases. */
+	assert(hpa_supported());
+	bool err;
+	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
+	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	central->base = base;
+	central->eden = NULL;
+	central->eden_len = 0;
+	central->hooks = *hooks;
+	return false;
+}
+
+static hpdata_t *
+hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
+	return (hpdata_t *)base_alloc(
+	    tsdn, central->base, sizeof(hpdata_t), CACHELINE);
+}
+
+hpdata_t *
+hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    uint64_t age, bool hugify_eager, bool *oom) {
+	/* Don't yet support big allocations; these should get filtered out. */
+	assert(size <= HUGEPAGE);
+	/*
+	 * Should only try to extract from the central allocator if the local
+	 * shard is exhausted.  We should hold the grow_mtx on that shard.
+	 */
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
+
+	malloc_mutex_lock(tsdn, &central->grow_mtx);
+	*oom = false;
+
+	hpdata_t *ps = NULL;
+	bool      start_as_huge = hugify_eager
+	    || (init_system_thp_mode == system_thp_mode_always
+	        && opt_experimental_hpa_start_huge_if_thp_always);
+
+	/* Is eden a perfect fit? */
+	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		hpdata_init(ps, central->eden, age, start_as_huge);
+		central->eden = NULL;
+		central->eden_len = 0;
+		malloc_mutex_unlock(tsdn, &central->grow_mtx);
+		return ps;
+	}
+
+	/*
+	 * We're about to try to allocate from eden by splitting.  If eden is
+	 * NULL, we have to allocate it too.  Otherwise, we just have to
+	 * allocate an edata_t for the new psset.
+	 */
+	if (central->eden == NULL) {
+		/* Allocate address space, bailing if we fail. */
+		void *new_eden = central->hooks.map(HPA_EDEN_SIZE);
+		if (new_eden == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		if (hugify_eager) {
+			central->hooks.hugify(
+			    new_eden, HPA_EDEN_SIZE, /* sync */ false);
+		}
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			central->hooks.unmap(new_eden, HPA_EDEN_SIZE);
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		central->eden = new_eden;
+		central->eden_len = HPA_EDEN_SIZE;
+	} else {
+		/* Eden is already nonempty; only need an edata for ps. */
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+	}
+	assert(ps != NULL);
+	assert(central->eden != NULL);
+	assert(central->eden_len > HUGEPAGE);
+	assert(central->eden_len % HUGEPAGE == 0);
+	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
+
+	hpdata_init(ps, central->eden, age, start_as_huge);
+
+	char *eden_char = (char *)central->eden;
+	eden_char += HUGEPAGE;
+	central->eden = (void *)eden_char;
+	central->eden_len -= HUGEPAGE;
+
+	malloc_mutex_unlock(tsdn, &central->grow_mtx);
+
+	return ps;
+}
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index ade581e8..2ec7029d 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -2,50 +2,75 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpa_hooks.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
-static void *hpa_hooks_map(size_t size);
-static void hpa_hooks_unmap(void *ptr, size_t size);
-static void hpa_hooks_purge(void *ptr, size_t size);
-static void hpa_hooks_hugify(void *ptr, size_t size);
-static void hpa_hooks_dehugify(void *ptr, size_t size);
-static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
+static void    *hpa_hooks_map(size_t size);
+static void     hpa_hooks_unmap(void *ptr, size_t size);
+static void     hpa_hooks_purge(void *ptr, size_t size);
+static bool     hpa_hooks_hugify(void *ptr, size_t size, bool sync);
+static void     hpa_hooks_dehugify(void *ptr, size_t size);
+static void     hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
+static bool hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes);
 
-hpa_hooks_t hpa_hooks_default = {
-	&hpa_hooks_map,
-	&hpa_hooks_unmap,
-	&hpa_hooks_purge,
-	&hpa_hooks_hugify,
-	&hpa_hooks_dehugify,
-	&hpa_hooks_curtime,
-	&hpa_hooks_ms_since
-};
+const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
+    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_dehugify,
+    &hpa_hooks_curtime, &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
 
 static void *
 hpa_hooks_map(size_t size) {
+	/*
+	 * During development, we're primarily concerned with systems
+	 * that overcommit.  Eventually, we should be more careful here.
+	 */
+
 	bool commit = true;
-	return pages_map(NULL, size, HUGEPAGE, &commit);
+	assert((size & HUGEPAGE_MASK) == 0);
+	void *ret = pages_map(NULL, size, HUGEPAGE, &commit);
+	JE_USDT(hpa_map, 2, size, ret);
+	return ret;
 }
 
 static void
 hpa_hooks_unmap(void *ptr, size_t size) {
+	JE_USDT(hpa_unmap, 2, size, ptr);
 	pages_unmap(ptr, size);
 }
 
 static void
 hpa_hooks_purge(void *ptr, size_t size) {
+	JE_USDT(hpa_purge, 2, size, ptr);
 	pages_purge_forced(ptr, size);
 }
 
-static void
-hpa_hooks_hugify(void *ptr, size_t size) {
+static bool
+hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
+	/*
+	 * We mark memory range as huge independently on which hugification
+	 * technique is used (synchronous or asynchronous) to have correct
+	 * VmFlags set for introspection and accounting purposes.  If
+	 * synchronous hugification is enabled and pages_collapse call fails,
+	 * then we hope memory range will be hugified asynchronously by
+	 * khugepaged eventually.  Right now, 3 out of 4 error return codes of
+	 * madvise(..., MADV_COLLAPSE) are retryable.  Instead of retrying, we
+	 * just fallback to asynchronous khugepaged hugification to simplify
+	 * implementation, even if we might know khugepaged fallback will not
+	 * be successful (current madvise(..., MADV_COLLAPSE) implementation
+	 * hints, when EINVAL is returned it is likely that khugepaged won't be
+	 * able to collapse memory range into hugepage either).
+	 */
 	bool err = pages_huge(ptr, size);
-	(void)err;
+	if (sync) {
+		err = pages_collapse(ptr, size);
+	}
+	JE_USDT(hpa_hugify, 4, size, ptr, err, sync);
+	return err;
 }
 
 static void
 hpa_hooks_dehugify(void *ptr, size_t size) {
 	bool err = pages_nohuge(ptr, size);
+	JE_USDT(hpa_dehugify, 3, size, ptr, err);
 	(void)err;
 }
 
@@ -59,5 +84,17 @@ hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 
 static uint64_t
 hpa_hooks_ms_since(nstime_t *past_nstime) {
-	return nstime_ns_since(past_nstime) / 1000 / 1000;
+	return nstime_ms_since(past_nstime);
+}
+
+/* Return true if we did not purge all nbytes, or on some error */
+static bool
+hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+	bool err = pages_purge_process_madvise(vec, vlen, nbytes);
+	JE_USDT(hpa_vectorized_purge, 3, nbytes, vlen, err);
+	return err;
+#else
+	return true;
+#endif
 }
diff --git a/src/hpa_utils.c b/src/hpa_utils.c
new file mode 100644
index 00000000..59bb0d1f
--- /dev/null
+++ b/src/hpa_utils.c
@@ -0,0 +1,33 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_utils.h"
+
+void
+hpa_purge_batch(hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz) {
+	assert(batch_sz > 0);
+
+	size_t len = hpa_process_madvise_max_iovec_len();
+	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
+
+	hpa_range_accum_t accum;
+	hpa_range_accum_init(&accum, vec, len);
+
+	for (size_t i = 0; i < batch_sz; ++i) {
+		/* Actually do the purging, now that the lock is dropped. */
+		if (batch[i].dehugify) {
+			hooks->dehugify(hpdata_addr_get(batch[i].hp), HUGEPAGE);
+		}
+		void  *purge_addr;
+		size_t purge_size;
+		size_t total_purged_on_one_hp = 0;
+		while (hpdata_purge_next(
+		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
+			total_purged_on_one_hp += purge_size;
+			assert(total_purged_on_one_hp <= HUGEPAGE);
+			hpa_range_accum_add(
+			    &accum, purge_addr, purge_size, hooks);
+		}
+	}
+	hpa_range_accum_finish(&accum, hooks);
+}
diff --git a/src/hpdata.c b/src/hpdata.c
index e7d7294c..e17d9ecf 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -17,11 +17,10 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
-void
-hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+    void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
-	hpdata->h_huge = false;
+	hpdata->h_huge = is_huge;
 	hpdata->h_alloc_allowed = true;
 	hpdata->h_in_psset_alloc_container = false;
 	hpdata->h_purge_allowed = false;
@@ -34,8 +33,16 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
-	hpdata->h_ntouched = 0;
-	fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
+	if (is_huge) {
+		fb_set_range(
+		    hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+		hpdata->h_ntouched = HUGEPAGE_PAGES;
+	} else {
+		fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
+		hpdata->h_ntouched = 0;
+	}
+	nstime_init_zero(&hpdata->h_time_purge_allowed);
+	hpdata->h_purged_when_empty_and_huge = false;
 
 	hpdata_assert_consistent(hpdata);
 }
@@ -66,8 +73,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 
 	size_t largest_unchosen_range = 0;
 	while (true) {
-		bool found = fb_urange_iter(hpdata->active_pages,
-		    HUGEPAGE_PAGES, start, &begin, &len);
+		bool found = fb_urange_iter(
+		    hpdata->active_pages, HUGEPAGE_PAGES, start, &begin, &len);
 		/*
 		 * A precondition to this function is that hpdata must be able
 		 * to serve the allocation.
@@ -97,8 +104,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	 * We might be about to dirty some memory for the first time; update our
 	 * count if so.
 	 */
-	size_t new_dirty = fb_ucount(hpdata->touched_pages,  HUGEPAGE_PAGES,
-	    result, npages);
+	size_t new_dirty = fb_ucount(
+	    hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	hpdata->h_ntouched += new_dirty;
 
@@ -129,8 +136,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	}
 
 	hpdata_assert_consistent(hpdata);
-	return (void *)(
-	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
+	return (
+	    void *)((byte_t *)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
@@ -148,10 +155,10 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 
 	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
 	/* We might have just created a new, larger range. */
-	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES,
-	    begin) + 1);
-	size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES,
-	    begin + npages - 1);
+	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES, begin)
+	    + 1);
+	size_t new_end = fb_ffs(
+	    hpdata->active_pages, HUGEPAGE_PAGES, begin + npages - 1);
 	size_t new_range_len = new_end - new_begin;
 
 	if (new_range_len > old_longest_range) {
@@ -164,7 +171,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 }
 
 size_t
-hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
+hpdata_purge_begin(
+    hpdata_t *hpdata, hpdata_purge_state_t *purge_state, size_t *nranges) {
 	hpdata_assert_consistent(hpdata);
 	/*
 	 * See the comment below; we might purge any inactive extent, so it's
@@ -211,34 +219,36 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 	fb_init(dirty_pages, HUGEPAGE_PAGES);
 	fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
-	fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
-	    HUGEPAGE_PAGES);
+	fb_bit_and(
+	    dirty_pages, dirty_pages, hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
 	size_t next_bit = 0;
+	*nranges = 0;
 	while (next_bit < HUGEPAGE_PAGES) {
-		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
-		    next_bit);
+		size_t next_dirty = fb_ffs(
+		    dirty_pages, HUGEPAGE_PAGES, next_bit);
 		/* Recall that fb_ffs returns nbits if no set bit is found. */
 		if (next_dirty == HUGEPAGE_PAGES) {
 			break;
 		}
-		size_t next_active = fb_ffs(hpdata->active_pages,
-		    HUGEPAGE_PAGES, next_dirty);
+		size_t next_active = fb_ffs(
+		    hpdata->active_pages, HUGEPAGE_PAGES, next_dirty);
 		/*
 		 * Don't purge past the end of the dirty extent, into retained
 		 * pages.  This helps the kernel a tiny bit, but honestly it's
 		 * mostly helpful for testing (where we tend to write test cases
 		 * that think in terms of the dirty ranges).
 		 */
-		ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
-		    next_active - 1);
+		ssize_t last_dirty = fb_fls(
+		    dirty_pages, HUGEPAGE_PAGES, next_active - 1);
 		assert(last_dirty >= 0);
 		assert((size_t)last_dirty >= next_dirty);
 		assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
 
 		fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
 		    last_dirty - next_dirty + 1);
+		(*nranges)++;
 		next_bit = next_active + 1;
 	}
 
@@ -246,9 +256,11 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
 	purge_state->ndirty_to_purge = ndirty;
 	assert(ndirty <= fb_scount(
-	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
-	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
-	    HUGEPAGE_PAGES));
+	           purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(ndirty
+	    == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(*nranges <= ndirty);
+	assert(ndirty == 0 || *nranges > 0);
 
 	hpdata_assert_consistent(hpdata);
 
@@ -276,8 +288,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 		return false;
 	}
 
-	*r_purge_addr = (void *)(
-	    (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
+	*r_purge_addr = (void *)((byte_t *)hpdata_addr_get(hpdata)
+	    + purge_begin * PAGE);
 	*r_purge_size = purge_len * PAGE;
 
 	purge_state->next_purge_search_begin = purge_begin + purge_len;
@@ -294,12 +306,13 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	/* See the comment in reserve. */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
 
-	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
-	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(purge_state->npurged
+	    == fb_scount(
+	        purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 	assert(purge_state->npurged >= purge_state->ndirty_to_purge);
 
-	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
-	    HUGEPAGE_PAGES);
+	fb_bit_not(
+	    purge_state->to_purge, purge_state->to_purge, HUGEPAGE_PAGES);
 	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
 	    purge_state->to_purge, HUGEPAGE_PAGES);
 	assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
diff --git a/src/inspect.c b/src/inspect.c
index 911b5d52..116e77a1 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -1,9 +1,10 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
+#include "jemalloc/internal/inspect.h"
 
 void
-inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
-    size_t *nregs, size_t *size) {
+inspect_extent_util_stats_get(
+    tsdn_t *tsdn, const void *ptr, size_t *nfree, size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
 	const edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
@@ -56,7 +57,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 	assert(arena != NULL);
 	const unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = arena_get_bin(arena, szind, binshard);
+	bin_t         *bin = arena_get_bin(arena, szind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7655de4e..8d341ba3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
@@ -26,15 +25,17 @@
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/util.h"
 
+#include "jemalloc/internal/conf.h"
+
 /******************************************************************************/
 /* Data. */
 
 /* Runtime configuration options. */
-const char	*je_malloc_conf
+const char *je_malloc_conf
 #ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
-    ;
+        ;
 /*
  * The usual rule is that the closer to runtime you are, the higher priority
  * your configuration settings are (so the jemalloc config options get lower
@@ -52,20 +53,23 @@ const char	*je_malloc_conf
  * We don't actually want this to be widespread, so we'll give it a silly name
  * and not mention it in headers or documentation.
  */
-const char	*je_malloc_conf_2_conf_harder
+const char *je_malloc_conf_2_conf_harder
 #ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
-    ;
+        ;
 
-bool	opt_abort =
+const char *opt_malloc_conf_symlink = NULL;
+const char *opt_malloc_conf_env_var = NULL;
+
+bool opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
 #else
     false
 #endif
     ;
-bool	opt_abort_conf =
+bool opt_abort_conf =
 #ifdef JEMALLOC_DEBUG
     true
 #else
@@ -73,29 +77,29 @@ bool	opt_abort_conf =
 #endif
     ;
 /* Intentionally default off, even with debug builds. */
-bool	opt_confirm_conf = false;
-const char	*opt_junk =
+bool        opt_confirm_conf = false;
+const char *opt_junk =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     "true"
 #else
     "false"
 #endif
     ;
-bool	opt_junk_alloc =
+bool opt_junk_alloc =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     true
 #else
     false
 #endif
     ;
-bool	opt_junk_free =
+bool opt_junk_free =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     true
 #else
     false
 #endif
     ;
-bool	opt_trust_madvise =
+bool opt_trust_madvise =
 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
     false
 #else
@@ -121,10 +125,17 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
-const char *zero_realloc_mode_names[] = {
-	"alloc",
-	"free",
-	"abort",
+/*
+ * Disable large size classes is now the default behavior in jemalloc.
+ * Although it is configurable in MALLOC_CONF, this is mainly for debugging
+ * purposes and should not be tuned.
+ */
+bool opt_disable_large_size_classes = true;
+
+const char *const zero_realloc_mode_names[] = {
+    "alloc",
+    "free",
+    "abort",
 };
 
 /*
@@ -134,33 +145,44 @@ const char *zero_realloc_mode_names[] = {
 static const uint8_t junk_alloc_byte = 0xa5;
 static const uint8_t junk_free_byte = 0x5a;
 
-static void default_junk_alloc(void *ptr, size_t usize) {
+static void
+default_junk_alloc(void *ptr, size_t usize) {
 	memset(ptr, junk_alloc_byte, usize);
 }
 
-static void default_junk_free(void *ptr, size_t usize) {
+static void
+default_junk_free(void *ptr, size_t usize) {
 	memset(ptr, junk_free_byte, usize);
 }
 
-void (*junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
-void (*junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+void (*JET_MUTABLE junk_alloc_callback)(
+    void *ptr, size_t size) = &default_junk_alloc;
+void (*JET_MUTABLE junk_free_callback)(
+    void *ptr, size_t size) = &default_junk_free;
+void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 
-bool	opt_utrace = false;
-bool	opt_xmalloc = false;
-bool	opt_experimental_infallible_new = false;
-bool	opt_zero = false;
-unsigned	opt_narenas = 0;
-fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
+bool         opt_utrace = false;
+bool         opt_xmalloc = false;
+bool         opt_experimental_infallible_new = false;
+bool         opt_experimental_tcache_gc = true;
+bool         opt_zero = false;
+unsigned     opt_narenas = 0;
+fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
 
-unsigned	ncpus;
+unsigned ncpus;
+
+unsigned opt_debug_double_free_max_scan =
+    SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
+
+size_t opt_calloc_madvise_threshold = CALLOC_MADVISE_THRESHOLD_DEFAULT;
 
 /* Protects arenas initialization. */
-malloc_mutex_t arenas_lock;
+static malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
-bool opt_hpa = false;
+bool             opt_hpa = false;
 hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
-sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
+sec_opts_t       opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -173,48 +195,49 @@ sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
  * Points to an arena_t.
  */
 JEMALLOC_ALIGNED(CACHELINE)
-atomic_p_t		arenas[MALLOCX_ARENA_LIMIT];
-static atomic_u_t	narenas_total; /* Use narenas_total_*(). */
+atomic_p_t        arenas[MALLOCX_ARENA_LIMIT];
+static atomic_u_t narenas_total; /* Use narenas_total_*(). */
 /* Below three are read-only after initialization. */
-static arena_t		*a0; /* arenas[0]. */
-unsigned		narenas_auto;
-unsigned		manual_arena_base;
+static arena_t *a0; /* arenas[0]. */
+unsigned        narenas_auto;
+unsigned        manual_arena_base;
 
 malloc_init_t malloc_init_state = malloc_init_uninitialized;
 
 /* False should be the common case.  Set to true to trigger initialization. */
-bool			malloc_slow = true;
+bool malloc_slow = true;
 
 /* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
-	flag_opt_junk_alloc	= (1U),
-	flag_opt_junk_free	= (1U << 1),
-	flag_opt_zero		= (1U << 2),
-	flag_opt_utrace		= (1U << 3),
-	flag_opt_xmalloc	= (1U << 4)
+	flag_opt_junk_alloc = (1U),
+	flag_opt_junk_free = (1U << 1),
+	flag_opt_zero = (1U << 2),
+	flag_opt_utrace = (1U << 3),
+	flag_opt_xmalloc = (1U << 4)
 };
-static uint8_t	malloc_slow_flags;
+static uint8_t malloc_slow_flags;
 
 #ifdef JEMALLOC_THREADED_INIT
 /* Used to let the initializing thread recursively allocate. */
-#  define NO_INITIALIZER	((unsigned long)0)
-#  define INITIALIZER		pthread_self()
-#  define IS_INITIALIZER	(malloc_initializer == pthread_self())
-static pthread_t		malloc_initializer = NO_INITIALIZER;
+#	define NO_INITIALIZER ((unsigned long)0)
+#	define INITIALIZER pthread_self()
+#	define IS_INITIALIZER                                                 \
+		(pthread_equal(malloc_initializer, pthread_self()))
+static pthread_t malloc_initializer = NO_INITIALIZER;
 #else
-#  define NO_INITIALIZER	false
-#  define INITIALIZER		true
-#  define IS_INITIALIZER	malloc_initializer
-static bool			malloc_initializer = NO_INITIALIZER;
+#	define NO_INITIALIZER false
+#	define INITIALIZER true
+#	define IS_INITIALIZER malloc_initializer
+static bool malloc_initializer = NO_INITIALIZER;
 #endif
 
 /* Used to avoid initialization races. */
 #ifdef _WIN32
-#if _WIN32_WINNT >= 0x0600
-static malloc_mutex_t	init_lock = SRWLOCK_INIT;
-#else
-static malloc_mutex_t	init_lock;
-static bool init_lock_initialized = false;
+#	if _WIN32_WINNT >= 0x0600
+static malloc_mutex_t init_lock = SRWLOCK_INIT;
+#	else
+static malloc_mutex_t init_lock;
+static bool           init_lock_initialized = false;
 
 JEMALLOC_ATTR(constructor)
 static void WINAPI
@@ -236,40 +259,40 @@ _init_init_lock(void) {
 	init_lock_initialized = true;
 }
 
-#ifdef _MSC_VER
-#  pragma section(".CRT$XCU", read)
-JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
-static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
-#endif
-#endif
+#		ifdef _MSC_VER
+#			pragma section(".CRT$XCU", read)
+JEMALLOC_SECTION(".CRT$XCU")
+JEMALLOC_ATTR(used)
+static const void(WINAPI *init_init_lock)(void) = _init_init_lock;
+#		endif
+#	endif
 #else
-static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
+static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
 #endif
 
 typedef struct {
-	void	*p;	/* Input pointer (as in realloc(p, s)). */
-	size_t	s;	/* Request size. */
-	void	*r;	/* Result pointer. */
+	void  *p; /* Input pointer (as in realloc(p, s)). */
+	size_t s; /* Request size. */
+	void  *r; /* Result pointer. */
 } malloc_utrace_t;
 
 #ifdef JEMALLOC_UTRACE
-#  define UTRACE(a, b, c) do {						\
-	if (unlikely(opt_utrace)) {					\
-		int utrace_serrno = errno;				\
-		malloc_utrace_t ut;					\
-		ut.p = (a);						\
-		ut.s = (b);						\
-		ut.r = (c);						\
-		UTRACE_CALL(&ut, sizeof(ut));				\
-		errno = utrace_serrno;					\
-	}								\
-} while (0)
+#	define UTRACE(a, b, c)                                                \
+		do {                                                           \
+			if (unlikely(opt_utrace)) {                            \
+				int             utrace_serrno = errno;         \
+				malloc_utrace_t ut;                            \
+				ut.p = (a);                                    \
+				ut.s = (b);                                    \
+				ut.r = (c);                                    \
+				UTRACE_CALL(&ut, sizeof(ut));                  \
+				errno = utrace_serrno;                         \
+			}                                                      \
+		} while (0)
 #else
-#  define UTRACE(a, b, c)
+#	define UTRACE(a, b, c)
 #endif
 
-/* Whether encountered any invalid config options. */
-static bool had_conf_error = false;
 
 /******************************************************************************/
 /*
@@ -277,8 +300,8 @@ static bool had_conf_error = false;
  * definition.
  */
 
-static bool	malloc_init_hard_a0(void);
-static bool	malloc_init_hard(void);
+static bool malloc_init_hard_a0(void);
+static bool malloc_init_hard(void);
 
 /******************************************************************************/
 /*
@@ -422,14 +445,13 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 	if (ind == 0) {
 		return;
 	}
-	/*
-	 * Avoid creating a new background thread just for the huge arena, which
-	 * purges eagerly by default.
-	 */
-	if (have_background_thread && !arena_is_huge(ind)) {
+
+	if (have_background_thread) {
 		if (background_thread_create(tsdn_tsd(tsdn), ind)) {
-			malloc_printf("<jemalloc>: error in background thread "
-				      "creation for arena %u. Abort.\n", ind);
+			malloc_printf(
+			    "<jemalloc>: error in background thread "
+			    "creation for arena %u. Abort.\n",
+			    ind);
 			abort();
 		}
 	}
@@ -457,12 +479,16 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 		tsd_iarena_set(tsd, arena);
 	} else {
 		tsd_arena_set(tsd, arena);
-		unsigned shard = atomic_fetch_add_u(&arena->binshard_next, 1,
-		    ATOMIC_RELAXED);
+		/*
+		 * While shard acts as a random seed, the cast below should
+		 * not make much difference.
+		 */
+		uint8_t shard = (uint8_t)atomic_fetch_add_u(
+		    &arena->binshard_next, 1, ATOMIC_RELAXED);
 		tsd_binshards_t *bins = tsd_binshardsp_get(tsd);
 		for (unsigned i = 0; i < SC_NBINS; i++) {
-			assert(bin_infos[i].n_shards > 0 &&
-			    bin_infos[i].n_shards <= BIN_SHARDS_MAX);
+			assert(bin_infos[i].n_shards > 0
+			    && bin_infos[i].n_shards <= BIN_SHARDS_MAX);
 			bins->binshard[i] = shard % bin_infos[i].n_shards;
 		}
 	}
@@ -477,8 +503,12 @@ arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) {
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 
-	if (arena_nthreads_get(oldarena, false) == 0) {
-		/* Purge if the old arena has no associated threads anymore. */
+	if (arena_nthreads_get(oldarena, false) == 0
+	    && !background_thread_enabled()) {
+		/*
+		 * Purge if the old arena has no associated threads anymore and
+		 * no background threads.
+		 */
 		arena_decay(tsd_tsdn(tsd), oldarena,
 		    /* is_background_thread */ false, /* all */ true);
 	}
@@ -515,7 +545,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 
 	if (narenas_auto > 1) {
 		unsigned i, j, choose[2], first_null;
-		bool is_new_arena[2];
+		bool     is_new_arena[2];
 
 		/*
 		 * Determine binding for both non-internal and internal
@@ -540,11 +570,14 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				 * number of threads assigned to it.
 				 */
 				for (j = 0; j < 2; j++) {
-					if (arena_nthreads_get(arena_get(
-					    tsd_tsdn(tsd), i, false), !!j) <
-					    arena_nthreads_get(arena_get(
-					    tsd_tsdn(tsd), choose[j], false),
-					    !!j)) {
+					if (arena_nthreads_get(
+					        arena_get(
+					            tsd_tsdn(tsd), i, false),
+					        !!j)
+					    < arena_nthreads_get(
+					        arena_get(tsd_tsdn(tsd),
+					            choose[j], false),
+					        !!j)) {
 						choose[j] = i;
 					}
 				}
@@ -563,16 +596,17 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 		}
 
 		for (j = 0; j < 2; j++) {
-			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
-			    choose[j], false), !!j) == 0 || first_null ==
-			    narenas_auto) {
+			if (arena_nthreads_get(
+			        arena_get(tsd_tsdn(tsd), choose[j], false), !!j)
+			        == 0
+			    || first_null == narenas_auto) {
 				/*
 				 * Use an unloaded arena, or the least loaded
 				 * arena if all arenas are already initialized.
 				 */
 				if (!!j == internal) {
-					ret = arena_get(tsd_tsdn(tsd),
-					    choose[j], false);
+					ret = arena_get(
+					    tsd_tsdn(tsd), choose[j], false);
 				}
 			} else {
 				arena_t *arena;
@@ -582,8 +616,8 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				arena = arena_init_locked(tsd_tsdn(tsd),
 				    choose[j], &arena_config_default);
 				if (arena == NULL) {
-					malloc_mutex_unlock(tsd_tsdn(tsd),
-					    &arenas_lock);
+					malloc_mutex_unlock(
+					    tsd_tsdn(tsd), &arenas_lock);
 					return NULL;
 				}
 				is_new_arena[j] = true;
@@ -635,7 +669,7 @@ arena_cleanup(tsd_t *tsd) {
 static void
 stats_print_atexit(void) {
 	if (config_stats) {
-		tsdn_t *tsdn;
+		tsdn_t  *tsdn;
 		unsigned narenas, i;
 
 		tsdn = tsdn_fetch();
@@ -653,13 +687,13 @@ stats_print_atexit(void) {
 				tcache_slow_t *tcache_slow;
 
 				malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
-				ql_foreach(tcache_slow, &arena->tcache_ql,
-				    link) {
-					tcache_stats_merge(tsdn,
-					    tcache_slow->tcache, arena);
+				ql_foreach (
+				    tcache_slow, &arena->tcache_ql, link) {
+					tcache_stats_merge(
+					    tsdn, tcache_slow->tcache, arena);
 				}
-				malloc_mutex_unlock(tsdn,
-				    &arena->tcache_ql_mtx);
+				malloc_mutex_unlock(
+				    tsdn, &arena->tcache_ql_mtx);
 			}
 		}
 	}
@@ -699,20 +733,6 @@ check_entry_exit_locking(tsdn_t *tsdn) {
  * Begin initialization functions.
  */
 
-static char *
-jemalloc_secure_getenv(const char *name) {
-#ifdef JEMALLOC_HAVE_SECURE_GETENV
-	return secure_getenv(name);
-#else
-#  ifdef JEMALLOC_HAVE_ISSETUGID
-	if (issetugid() != 0) {
-		return NULL;
-	}
-#  endif
-	return getenv(name);
-#endif
-}
-
 static unsigned
 malloc_ncpus(void) {
 	long result;
@@ -733,16 +753,16 @@ malloc_ncpus(void) {
 	 * is available, to avoid using more arenas than necessary.
 	 */
 	{
-#  if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
 		cpuset_t set;
-#  else
+#	else
 		cpu_set_t set;
-#  endif
-#  if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#	endif
+#	if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 		sched_getaffinity(0, sizeof(set), &set);
-#  else
+#	else
 		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
-#  endif
+#	endif
 		result = CPU_COUNT(&set);
 	}
 #else
@@ -759,8 +779,7 @@ malloc_ncpus(void) {
  * Since otherwise tricky things is possible with percpu arenas in use.
  */
 static bool
-malloc_cpu_count_is_deterministic()
-{
+malloc_cpu_count_is_deterministic(void) {
 #ifdef _WIN32
 	return true;
 #else
@@ -769,195 +788,26 @@ malloc_cpu_count_is_deterministic()
 	if (cpu_onln != cpu_conf) {
 		return false;
 	}
-#  if defined(CPU_COUNT)
-#    if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(CPU_COUNT)
+#		if defined(__FreeBSD__) || defined(__DragonFly__)
 	cpuset_t set;
-#    else
+#		else
 	cpu_set_t set;
-#    endif /* __FreeBSD__ */
-#    if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#		endif /* __FreeBSD__ */
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	sched_getaffinity(0, sizeof(set), &set);
-#    else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
+#		else  /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
 	pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
-#    endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
+#		endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
 	long cpu_affinity = CPU_COUNT(&set);
 	if (cpu_affinity != cpu_conf) {
 		return false;
 	}
-#  endif /* CPU_COUNT */
+#	endif         /* CPU_COUNT */
 	return true;
 #endif
 }
 
-static void
-init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
-	size_t opts_len = strlen(dest);
-	assert(opts_len <= stats_print_tot_num_options);
-
-	for (size_t i = 0; i < vlen; i++) {
-		switch (v[i]) {
-#define OPTION(o, v, d, s) case o: break;
-			STATS_PRINT_OPTIONS
-#undef OPTION
-		default: continue;
-		}
-
-		if (strchr(dest, v[i]) != NULL) {
-			/* Ignore repeated. */
-			continue;
-		}
-
-		dest[opts_len++] = v[i];
-		dest[opts_len] = '\0';
-		assert(opts_len <= stats_print_tot_num_options);
-	}
-	assert(opts_len == strlen(dest));
-}
-
-/* Reads the next size pair in a multi-sized option. */
-static bool
-malloc_conf_multi_sizes_next(const char **slab_size_segment_cur,
-    size_t *vlen_left, size_t *slab_start, size_t *slab_end, size_t *new_size) {
-	const char *cur = *slab_size_segment_cur;
-	char *end;
-	uintmax_t um;
-
-	set_errno(0);
-
-	/* First number, then '-' */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0 || *end != '-') {
-		return true;
-	}
-	*slab_start = (size_t)um;
-	cur = end + 1;
-
-	/* Second number, then ':' */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0 || *end != ':') {
-		return true;
-	}
-	*slab_end = (size_t)um;
-	cur = end + 1;
-
-	/* Last number */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0) {
-		return true;
-	}
-	*new_size = (size_t)um;
-
-	/* Consume the separator if there is one. */
-	if (*end == '|') {
-		end++;
-	}
-
-	*vlen_left -= end - *slab_size_segment_cur;
-	*slab_size_segment_cur = end;
-
-	return false;
-}
-
-static bool
-malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
-    char const **v_p, size_t *vlen_p) {
-	bool accept;
-	const char *opts = *opts_p;
-
-	*k_p = opts;
-
-	for (accept = false; !accept;) {
-		switch (*opts) {
-		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-		case 'Y': case 'Z':
-		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-		case 'y': case 'z':
-		case '0': case '1': case '2': case '3': case '4': case '5':
-		case '6': case '7': case '8': case '9':
-		case '_':
-			opts++;
-			break;
-		case ':':
-			opts++;
-			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
-			*v_p = opts;
-			accept = true;
-			break;
-		case '\0':
-			if (opts != *opts_p) {
-				malloc_write("<jemalloc>: Conf string ends "
-				    "with key\n");
-				had_conf_error = true;
-			}
-			return true;
-		default:
-			malloc_write("<jemalloc>: Malformed conf string\n");
-			had_conf_error = true;
-			return true;
-		}
-	}
-
-	for (accept = false; !accept;) {
-		switch (*opts) {
-		case ',':
-			opts++;
-			/*
-			 * Look ahead one character here, because the next time
-			 * this function is called, it will assume that end of
-			 * input has been cleanly reached if no input remains,
-			 * but we have optimistically already consumed the
-			 * comma if one exists.
-			 */
-			if (*opts == '\0') {
-				malloc_write("<jemalloc>: Conf string ends "
-				    "with comma\n");
-				had_conf_error = true;
-			}
-			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
-			accept = true;
-			break;
-		case '\0':
-			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
-			accept = true;
-			break;
-		default:
-			opts++;
-			break;
-		}
-	}
-
-	*opts_p = opts;
-	return false;
-}
-
-static void
-malloc_abort_invalid_conf(void) {
-	assert(opt_abort_conf);
-	malloc_printf("<jemalloc>: Abort (abort_conf:true) on invalid conf "
-	    "value (see above).\n");
-	abort();
-}
-
-static void
-malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
-    size_t vlen) {
-	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
-	    (int)vlen, v);
-	/* If abort_conf is set, error out after processing all options. */
-	const char *experimental = "experimental_";
-	if (strncmp(k, experimental, strlen(experimental)) == 0) {
-		/* However, tolerate experimental features. */
-		return;
-	}
-	had_conf_error = true;
-}
-
 static void
 malloc_slow_flag_init(void) {
 	/*
@@ -973,797 +823,11 @@ malloc_slow_flag_init(void) {
 	malloc_slow = (malloc_slow_flags != 0);
 }
 
-/* Number of sources for initializing malloc_conf */
-#define MALLOC_CONF_NSOURCES 5
-
-static const char *
-obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
-	if (config_debug) {
-		static unsigned read_source = 0;
-		/*
-		 * Each source should only be read once, to minimize # of
-		 * syscalls on init.
-		 */
-		assert(read_source++ == which_source);
-	}
-	assert(which_source < MALLOC_CONF_NSOURCES);
-
-	const char *ret;
-	switch (which_source) {
-	case 0:
-		ret = config_malloc_conf;
-		break;
-	case 1:
-		if (je_malloc_conf != NULL) {
-			/* Use options that were compiled into the program. */
-			ret = je_malloc_conf;
-		} else {
-			/* No configuration specified. */
-			ret = NULL;
-		}
-		break;
-	case 2: {
-		ssize_t linklen = 0;
-#ifndef _WIN32
-		int saved_errno = errno;
-		const char *linkname =
-#  ifdef JEMALLOC_PREFIX
-		    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#  else
-		    "/etc/malloc.conf"
-#  endif
-		    ;
-
-		/*
-		 * Try to use the contents of the "/etc/malloc.conf" symbolic
-		 * link's name.
-		 */
-#ifndef JEMALLOC_READLINKAT
-		linklen = readlink(linkname, buf, PATH_MAX);
-#else
-		linklen = readlinkat(AT_FDCWD, linkname, buf, PATH_MAX);
-#endif
-		if (linklen == -1) {
-			/* No configuration specified. */
-			linklen = 0;
-			/* Restore errno. */
-			set_errno(saved_errno);
-		}
-#endif
-		buf[linklen] = '\0';
-		ret = buf;
-		break;
-	} case 3: {
-		const char *envname =
-#ifdef JEMALLOC_PREFIX
-		    JEMALLOC_CPREFIX"MALLOC_CONF"
-#else
-		    "MALLOC_CONF"
-#endif
-		    ;
-
-		if ((ret = jemalloc_secure_getenv(envname)) != NULL) {
-			/*
-			 * Do nothing; opts is already initialized to the value
-			 * of the MALLOC_CONF environment variable.
-			 */
-		} else {
-			/* No configuration specified. */
-			ret = NULL;
-		}
-		break;
-	} case 4: {
-		ret = je_malloc_conf_2_conf_harder;
-		break;
-	} default:
-		not_reached();
-		ret = NULL;
-	}
-	return ret;
-}
-
-static void
-malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
-    char buf[PATH_MAX + 1]) {
-	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
-		"string specified via --with-malloc-conf",
-		"string pointed to by the global variable malloc_conf",
-		"\"name\" of the file referenced by the symbolic link named "
-		    "/etc/malloc.conf",
-		"value of the environment variable MALLOC_CONF",
-		"string pointed to by the global variable "
-		    "malloc_conf_2_conf_harder",
-	};
-	unsigned i;
-	const char *opts, *k, *v;
-	size_t klen, vlen;
-
-	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
-		/* Get runtime configuration. */
-		if (initial_call) {
-			opts_cache[i] = obtain_malloc_conf(i, buf);
-		}
-		opts = opts_cache[i];
-		if (!initial_call && opt_confirm_conf) {
-			malloc_printf(
-			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n",
-			    i + 1, opts_explain[i], opts != NULL ? opts : "");
-		}
-		if (opts == NULL) {
-			continue;
-		}
-
-		while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
-		    &vlen)) {
-
-#define CONF_ERROR(msg, k, klen, v, vlen)				\
-			if (!initial_call) {				\
-				malloc_conf_error(			\
-				    msg, k, klen, v, vlen);		\
-				cur_opt_valid = false;			\
-			}
-#define CONF_CONTINUE	{						\
-				if (!initial_call && opt_confirm_conf	\
-				    && cur_opt_valid) {			\
-					malloc_printf("<jemalloc>: -- "	\
-					    "Set conf value: %.*s:%.*s"	\
-					    "\n", (int)klen, k,		\
-					    (int)vlen, v);		\
-				}					\
-				continue;				\
-			}
-#define CONF_MATCH(n)							\
-	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
-#define CONF_MATCH_VALUE(n)						\
-	(sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
-#define CONF_HANDLE_BOOL(o, n)						\
-			if (CONF_MATCH(n)) {				\
-				if (CONF_MATCH_VALUE("true")) {		\
-					o = true;			\
-				} else if (CONF_MATCH_VALUE("false")) {	\
-					o = false;			\
-				} else {				\
-					CONF_ERROR("Invalid conf value",\
-					    k, klen, v, vlen);		\
-				}					\
-				CONF_CONTINUE;				\
-			}
-      /*
-       * One of the CONF_MIN macros below expands, in one of the use points,
-       * to "unsigned integer < 0", which is always false, triggering the
-       * GCC -Wtype-limits warning, which we disable here and re-enable below.
-       */
-      JEMALLOC_DIAGNOSTIC_PUSH
-      JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-
-#define CONF_DONT_CHECK_MIN(um, min)	false
-#define CONF_CHECK_MIN(um, min)	((um) < (min))
-#define CONF_DONT_CHECK_MAX(um, max)	false
-#define CONF_CHECK_MAX(um, max)	((um) > (max))
-
-#define CONF_VALUE_READ(max_t, result)					\
-	      char *end;						\
-	      set_errno(0);						\
-	      result = (max_t)malloc_strtoumax(v, &end, 0);
-#define CONF_VALUE_READ_FAIL()						\
-	      (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
-
-#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip) \
-			if (CONF_MATCH(n)) {				\
-				max_t mv;				\
-				CONF_VALUE_READ(max_t, mv)		\
-				if (CONF_VALUE_READ_FAIL()) {		\
-					CONF_ERROR("Invalid conf value",\
-					    k, klen, v, vlen);		\
-				} else if (clip) {			\
-					if (check_min(mv, (t)(min))) {	\
-						o = (t)(min);		\
-					} else if (			\
-					    check_max(mv, (t)(max))) {	\
-						o = (t)(max);		\
-					} else {			\
-						o = (t)mv;		\
-					}				\
-				} else {				\
-					if (check_min(mv, (t)(min)) ||	\
-					    check_max(mv, (t)(max))) {	\
-						CONF_ERROR(		\
-						    "Out-of-range "	\
-						    "conf value",	\
-						    k, klen, v, vlen);	\
-					} else {			\
-						o = (t)mv;		\
-					}				\
-				}					\
-				CONF_CONTINUE;				\
-			}
-#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
-	      CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min,	\
-			    check_max, clip)
-#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)\
-	      CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min,	\
-			    check_max, clip)
-
-#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
-    clip)								\
-			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
-			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)	\
-			CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)\
-			CONF_HANDLE_T_U(uint64_t, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
-			CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max,	\
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, false)
-#define CONF_HANDLE_CHAR_P(o, n, d)					\
-			if (CONF_MATCH(n)) {				\
-				size_t cpylen = (vlen <=		\
-				    sizeof(o)-1) ? vlen :		\
-				    sizeof(o)-1;			\
-				strncpy(o, v, cpylen);			\
-				o[cpylen] = '\0';			\
-				CONF_CONTINUE;				\
-			}
-
-			bool cur_opt_valid = true;
-
-			CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf")
-			if (initial_call) {
-				continue;
-			}
-
-			CONF_HANDLE_BOOL(opt_abort, "abort")
-			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
-			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
-			if (strncmp("metadata_thp", k, klen) == 0) {
-				int m;
-				bool match = false;
-				for (m = 0; m < metadata_thp_mode_limit; m++) {
-					if (strncmp(metadata_thp_mode_names[m],
-					    v, vlen) == 0) {
-						opt_metadata_thp = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_BOOL(opt_retain, "retain")
-			if (strncmp("dss", k, klen) == 0) {
-				int m;
-				bool match = false;
-				for (m = 0; m < dss_prec_limit; m++) {
-					if (strncmp(dss_prec_names[m], v, vlen)
-					    == 0) {
-						if (extent_dss_prec_set(m)) {
-							CONF_ERROR(
-							    "Error setting dss",
-							    k, klen, v, vlen);
-						} else {
-							opt_dss =
-							    dss_prec_names[m];
-							match = true;
-							break;
-						}
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("narenas")) {
-				if (CONF_MATCH_VALUE("default")) {
-					opt_narenas = 0;
-					CONF_CONTINUE;
-				} else {
-					CONF_HANDLE_UNSIGNED(opt_narenas,
-					    "narenas", 1, UINT_MAX,
-					    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-					    /* clip */ false)
-				}
-			}
-			if (CONF_MATCH("narenas_ratio")) {
-				char *end;
-				bool err = fxp_parse(&opt_narenas_ratio, v,
-				    &end);
-				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("bin_shards")) {
-				const char *bin_shards_segment_cur = v;
-				size_t vlen_left = vlen;
-				do {
-					size_t size_start;
-					size_t size_end;
-					size_t nshards;
-					bool err = malloc_conf_multi_sizes_next(
-					    &bin_shards_segment_cur, &vlen_left,
-					    &size_start, &size_end, &nshards);
-					if (err || bin_update_shard_size(
-					    bin_shard_sizes, size_start,
-					    size_end, nshards)) {
-						CONF_ERROR(
-						    "Invalid settings for "
-						    "bin_shards", k, klen, v,
-						    vlen);
-						break;
-					}
-				} while (vlen_left > 0);
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
-			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, false);
-			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
-			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
-			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
-			    SSIZE_MAX);
-			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
-			    "muzzy_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
-			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
-			    SSIZE_MAX);
-			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
-			if (CONF_MATCH("stats_print_opts")) {
-				init_opt_stats_opts(v, vlen,
-				    opt_stats_print_opts);
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_INT64_T(opt_stats_interval,
-			    "stats_interval", -1, INT64_MAX,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
-			if (CONF_MATCH("stats_interval_opts")) {
-				init_opt_stats_opts(v, vlen,
-				    opt_stats_interval_opts);
-				CONF_CONTINUE;
-			}
-			if (config_fill) {
-				if (CONF_MATCH("junk")) {
-					if (CONF_MATCH_VALUE("true")) {
-						opt_junk = "true";
-						opt_junk_alloc = opt_junk_free =
-						    true;
-					} else if (CONF_MATCH_VALUE("false")) {
-						opt_junk = "false";
-						opt_junk_alloc = opt_junk_free =
-						    false;
-					} else if (CONF_MATCH_VALUE("alloc")) {
-						opt_junk = "alloc";
-						opt_junk_alloc = true;
-						opt_junk_free = false;
-					} else if (CONF_MATCH_VALUE("free")) {
-						opt_junk = "free";
-						opt_junk_alloc = false;
-						opt_junk_free = true;
-					} else {
-						CONF_ERROR(
-						    "Invalid conf value",
-						    k, klen, v, vlen);
-					}
-					CONF_CONTINUE;
-				}
-				CONF_HANDLE_BOOL(opt_zero, "zero")
-			}
-			if (config_utrace) {
-				CONF_HANDLE_BOOL(opt_utrace, "utrace")
-			}
-			if (config_xmalloc) {
-				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
-			}
-			if (config_enable_cxx) {
-				CONF_HANDLE_BOOL(
-				    opt_experimental_infallible_new,
-				    "experimental_infallible_new")
-			}
-
-			CONF_HANDLE_BOOL(opt_tcache, "tcache")
-			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
-			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			if (CONF_MATCH("lg_tcache_max")) {
-				size_t m;
-				CONF_VALUE_READ(size_t, m)
-				if (CONF_VALUE_READ_FAIL()) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				} else {
-					/* clip if necessary */
-					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
-						m = TCACHE_LG_MAXCLASS_LIMIT;
-					}
-					opt_tcache_max = (size_t)1 << m;
-				}
-				CONF_CONTINUE;
-			}
-			/*
-			 * Anyone trying to set a value outside -16 to 16 is
-			 * deeply confused.
-			 */
-			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
-			    "lg_tcache_nslots_mul", -16, 16)
-			/* Ditto with values past 2048. */
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
-			    "tcache_nslots_small_min", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
-			    "tcache_nslots_small_max", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
-			    "tcache_nslots_large", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
-			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
-			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ false)
-			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
-			    "lg_tcache_flush_small_div", 1, 16,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
-			    "lg_tcache_flush_large_div", 1, 16,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
-
-			/*
-			 * The runtime option of oversize_threshold remains
-			 * undocumented.  It may be tweaked in the next major
-			 * release (6.0).  The default value 8M is rather
-			 * conservative / safe.  Tuning it further down may
-			 * improve fragmentation a bit more, but may also cause
-			 * contention on the huge arena.
-			 */
-			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
-			    "oversize_threshold", 0, SC_LARGE_MAXCLASS,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false)
-			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
-			    "lg_extent_max_active_fit", 0,
-			    (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN,
-			    CONF_CHECK_MAX, false)
-
-			if (strncmp("percpu_arena", k, klen) == 0) {
-				bool match = false;
-				for (int m = percpu_arena_mode_names_base; m <
-				    percpu_arena_mode_names_limit; m++) {
-					if (strncmp(percpu_arena_mode_names[m],
-					    v, vlen) == 0) {
-						if (!have_percpu_arena) {
-							CONF_ERROR(
-							    "No getcpu support",
-							    k, klen, v, vlen);
-						}
-						opt_percpu_arena = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_BOOL(opt_background_thread,
-			    "background_thread");
-			CONF_HANDLE_SIZE_T(opt_max_background_threads,
-					   "max_background_threads", 1,
-					   opt_max_background_threads,
-					   CONF_CHECK_MIN, CONF_CHECK_MAX,
-					   true);
-			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
-			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-
-			/*
-			 * Accept either a ratio-based or an exact hugification
-			 * threshold.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_opts.hugification_threshold,
-			    "hpa_hugification_threshold", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
-				fxp_t ratio;
-				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
-				if (err || (size_t)(end - v) != vlen
-				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				} else {
-					opt_hpa_opts.hugification_threshold =
-					    fxp_mul_frac(HUGEPAGE, ratio);
-				}
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_UINT64_T(
-			    opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
-			    0, 0, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    false);
-
-			CONF_HANDLE_UINT64_T(
-			    opt_hpa_opts.min_purge_interval_ms,
-			    "hpa_min_purge_interval_ms", 0, 0,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
-
-			if (CONF_MATCH("hpa_dirty_mult")) {
-				if (CONF_MATCH_VALUE("-1")) {
-					opt_hpa_opts.dirty_mult = (fxp_t)-1;
-					CONF_CONTINUE;
-				}
-				fxp_t ratio;
-				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
-				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				} else {
-					opt_hpa_opts.dirty_mult = ratio;
-				}
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
-			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
-			    "hpa_sec_max_alloc", PAGE, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
-			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.bytes_after_flush,
-			    "hpa_sec_bytes_after_flush", PAGE, 0,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", 0, HUGEPAGE_PAGES,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-
-			if (CONF_MATCH("slab_sizes")) {
-				if (CONF_MATCH_VALUE("default")) {
-					sc_data_init(sc_data);
-					CONF_CONTINUE;
-				}
-				bool err;
-				const char *slab_size_segment_cur = v;
-				size_t vlen_left = vlen;
-				do {
-					size_t slab_start;
-					size_t slab_end;
-					size_t pgs;
-					err = malloc_conf_multi_sizes_next(
-					    &slab_size_segment_cur,
-					    &vlen_left, &slab_start, &slab_end,
-					    &pgs);
-					if (!err) {
-						sc_data_update_slab_size(
-						    sc_data, slab_start,
-						    slab_end, (int)pgs);
-					} else {
-						CONF_ERROR("Invalid settings "
-						    "for slab_sizes",
-						    k, klen, v, vlen);
-					}
-				} while (!err && vlen_left > 0);
-				CONF_CONTINUE;
-			}
-			if (config_prof) {
-				CONF_HANDLE_BOOL(opt_prof, "prof")
-				CONF_HANDLE_CHAR_P(opt_prof_prefix,
-				    "prof_prefix", "jeprof")
-				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
-				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
-				    "prof_thread_active_init")
-				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
-				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
-				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-				    true)
-				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
-				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
-				    "lg_prof_interval", -1,
-				    (sizeof(uint64_t) << 3) - 1)
-				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
-				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
-				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
-				CONF_HANDLE_BOOL(opt_prof_leak_error,
-				    "prof_leak_error")
-				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
-				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
-				    "prof_recent_alloc_max", -1, SSIZE_MAX)
-				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
-				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
-				    "prof_sys_thread_name")
-				if (CONF_MATCH("prof_time_resolution")) {
-					if (CONF_MATCH_VALUE("default")) {
-						opt_prof_time_res =
-						    prof_time_res_default;
-					} else if (CONF_MATCH_VALUE("high")) {
-						if (!config_high_res_timer) {
-							CONF_ERROR(
-							    "No high resolution"
-							    " timer support",
-							    k, klen, v, vlen);
-						} else {
-							opt_prof_time_res =
-							    prof_time_res_high;
-						}
-					} else {
-						CONF_ERROR("Invalid conf value",
-						    k, klen, v, vlen);
-					}
-					CONF_CONTINUE;
-				}
-				/*
-				 * Undocumented.  When set to false, don't
-				 * correct for an unbiasing bug in jeprof
-				 * attribution.  This can be handy if you want
-				 * to get consistent numbers from your binary
-				 * across different jemalloc versions, even if
-				 * those numbers are incorrect.  The default is
-				 * true.
-				 */
-				CONF_HANDLE_BOOL(opt_prof_unbias, "prof_unbias")
-			}
-			if (config_log) {
-				if (CONF_MATCH("log")) {
-					size_t cpylen = (
-					    vlen <= sizeof(log_var_names) ?
-					    vlen : sizeof(log_var_names) - 1);
-					strncpy(log_var_names, v, cpylen);
-					log_var_names[cpylen] = '\0';
-					CONF_CONTINUE;
-				}
-			}
-			if (CONF_MATCH("thp")) {
-				bool match = false;
-				for (int m = 0; m < thp_mode_names_limit; m++) {
-					if (strncmp(thp_mode_names[m],v, vlen)
-					    == 0) {
-						if (!have_madvise_huge && !have_memcntl) {
-							CONF_ERROR(
-							    "No THP support",
-							    k, klen, v, vlen);
-						}
-						opt_thp = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("zero_realloc")) {
-				if (CONF_MATCH_VALUE("alloc")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_alloc;
-				} else if (CONF_MATCH_VALUE("free")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_free;
-				} else if (CONF_MATCH_VALUE("abort")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_abort;
-				} else {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (config_uaf_detection &&
-			    CONF_MATCH("lg_san_uaf_align")) {
-				ssize_t a;
-				CONF_VALUE_READ(ssize_t, a)
-				if (CONF_VALUE_READ_FAIL() || a < -1) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				}
-				if (a == -1) {
-					opt_lg_san_uaf_align = -1;
-					CONF_CONTINUE;
-				}
-
-				/* clip if necessary */
-				ssize_t max_allowed = (sizeof(size_t) << 3) - 1;
-				ssize_t min_allowed = LG_PAGE;
-				if (a > max_allowed) {
-					a = max_allowed;
-				} else if (a < min_allowed) {
-					a = min_allowed;
-				}
-
-				opt_lg_san_uaf_align = a;
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_SIZE_T(opt_san_guard_small,
-			    "san_guard_small", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
-			CONF_HANDLE_SIZE_T(opt_san_guard_large,
-			    "san_guard_large", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
-
-			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
-#undef CONF_ERROR
-#undef CONF_CONTINUE
-#undef CONF_MATCH
-#undef CONF_MATCH_VALUE
-#undef CONF_HANDLE_BOOL
-#undef CONF_DONT_CHECK_MIN
-#undef CONF_CHECK_MIN
-#undef CONF_DONT_CHECK_MAX
-#undef CONF_CHECK_MAX
-#undef CONF_HANDLE_T
-#undef CONF_HANDLE_T_U
-#undef CONF_HANDLE_T_SIGNED
-#undef CONF_HANDLE_UNSIGNED
-#undef CONF_HANDLE_SIZE_T
-#undef CONF_HANDLE_SSIZE_T
-#undef CONF_HANDLE_CHAR_P
-    /* Re-enable diagnostic "-Wtype-limits" */
-    JEMALLOC_DIAGNOSTIC_POP
-		}
-		if (opt_abort_conf && had_conf_error) {
-			malloc_abort_invalid_conf();
-		}
-	}
-	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
-}
-
-static bool
-malloc_conf_init_check_deps(void) {
-	if (opt_prof_leak_error && !opt_prof_final) {
-		malloc_printf("<jemalloc>: prof_leak_error is set w/o "
-		    "prof_final.\n");
-		return true;
-	}
-
-	return false;
-}
-
-static void
-malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
-	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
-		NULL};
-	char buf[PATH_MAX + 1];
-
-	/* The first call only set the confirm_conf option and opts_cache */
-	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
-	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
-	    NULL);
-	if (malloc_conf_init_check_deps()) {
-		/* check_deps does warning msg only; abort below if needed. */
-		if (opt_abort_conf) {
-			malloc_abort_invalid_conf();
-		}
-	}
-}
-
-#undef MALLOC_CONF_NSOURCES
-
 static bool
 malloc_init_hard_needed(void) {
-	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
-	    malloc_init_recursible)) {
+	if (malloc_initialized()
+	    || (IS_INITIALIZER
+	        && malloc_init_state == malloc_init_recursible)) {
 		/*
 		 * Another thread initialized the allocator before this one
 		 * acquired init_lock, or this thread is the initializing
@@ -1787,7 +851,7 @@ malloc_init_hard_needed(void) {
 }
 
 static bool
-malloc_init_hard_a0_locked() {
+malloc_init_hard_a0_locked(void) {
 	malloc_initializer = INITIALIZER;
 
 	JEMALLOC_DIAGNOSTIC_PUSH
@@ -1813,7 +877,9 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot0();
 	}
-	malloc_conf_init(&sc_data, bin_shard_sizes);
+	char readlink_buf[PATH_MAX + 1];
+	readlink_buf[0] = '\0';
+	malloc_conf_init(&sc_data, bin_shard_sizes, readlink_buf);
 	san_init(opt_lg_san_uaf_align);
 	sz_boot(&sc_data, opt_cache_oblivious);
 	bin_info_boot(&sc_data, bin_shard_sizes);
@@ -1851,7 +917,8 @@ malloc_init_hard_a0_locked() {
 		prof_boot1();
 	}
 	if (opt_hpa && !hpa_supported()) {
-		malloc_printf("<jemalloc>: HPA not supported in the current "
+		malloc_printf(
+		    "<jemalloc>: HPA not supported in the current "
 		    "configuration; %s.",
 		    opt_abort_conf ? "aborting" : "disabling");
 		if (opt_abort_conf) {
@@ -1867,10 +934,11 @@ malloc_init_hard_a0_locked() {
 		return true;
 	}
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	hook_boot();
+	experimental_thread_events_boot();
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
@@ -1888,7 +956,8 @@ malloc_init_hard_a0_locked() {
 	a0 = arena_get(TSDN_NULL, 0, false);
 
 	if (opt_hpa && !hpa_supported()) {
-		malloc_printf("<jemalloc>: HPA not supported in the current "
+		malloc_printf(
+		    "<jemalloc>: HPA not supported in the current "
 		    "configuration; %s.",
 		    opt_abort_conf ? "aborting" : "disabling");
 		if (opt_abort_conf) {
@@ -1896,17 +965,19 @@ malloc_init_hard_a0_locked() {
 		} else {
 			opt_hpa = false;
 		}
-	} else if (opt_hpa) {
-		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
-		hpa_shard_opts.deferral_allowed = background_thread_enabled();
-		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
-			return true;
-		}
 	}
 
 	malloc_init_state = malloc_init_a0_initialized;
 
+	size_t buf_len = strlen(readlink_buf);
+	if (buf_len > 0) {
+		void *readlink_allocated = a0ialloc(buf_len + 1, false, true);
+		if (readlink_allocated != NULL) {
+			memcpy(readlink_allocated, readlink_buf, buf_len + 1);
+			opt_malloc_conf_symlink = readlink_allocated;
+		}
+	}
+
 	return false;
 }
 
@@ -1937,7 +1008,8 @@ malloc_init_hard_recursible(void) {
 			 */
 			if (opt_narenas == 0) {
 				opt_percpu_arena = percpu_arena_disabled;
-				malloc_write("<jemalloc>: Number of CPUs "
+				malloc_write(
+				    "<jemalloc>: Number of CPUs "
 				    "detected is not deterministic. Per-CPU "
 				    "arena disabled.\n");
 				if (opt_abort_conf) {
@@ -1951,11 +1023,12 @@ malloc_init_hard_recursible(void) {
 	}
 
 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
-    && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
-    !defined(__native_client__))
+    && !defined(JEMALLOC_ZONE) && !defined(_WIN32)                             \
+    && !defined(__native_client__))
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
-	    jemalloc_postfork_child) != 0) {
+	        jemalloc_postfork_child)
+	    != 0) {
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort) {
 			abort();
@@ -1979,8 +1052,8 @@ malloc_narenas_default(void) {
 	 * default.
 	 */
 	if (ncpus > 1) {
-		fxp_t fxp_ncpus = FXP_INIT_INT(ncpus);
-		fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
+		fxp_t    fxp_ncpus = FXP_INIT_INT(ncpus);
+		fxp_t    goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
 		uint32_t int_goal = fxp_round_nearest(goal);
 		if (int_goal == 0) {
 			return 1;
@@ -2004,34 +1077,41 @@ percpu_arena_as_initialized(percpu_arena_mode_t mode) {
 }
 
 static bool
-malloc_init_narenas(void) {
+malloc_init_narenas(tsdn_t *tsdn) {
 	assert(ncpus > 0);
 
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		if (!have_percpu_arena || malloc_getcpu() < 0) {
 			opt_percpu_arena = percpu_arena_disabled;
-			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
-			    "available. Setting narenas to %u.\n", opt_narenas ?
-			    opt_narenas : malloc_narenas_default());
+			malloc_printf(
+			    "<jemalloc>: perCPU arena getcpu() not "
+			    "available. Setting narenas to %u.\n",
+			    opt_narenas ? opt_narenas
+			                : malloc_narenas_default());
 			if (opt_abort) {
 				abort();
 			}
 		} else {
 			if (ncpus >= MALLOCX_ARENA_LIMIT) {
-				malloc_printf("<jemalloc>: narenas w/ percpu"
-				    "arena beyond limit (%d)\n", ncpus);
+				malloc_printf(
+				    "<jemalloc>: narenas w/ percpu"
+				    "arena beyond limit (%d)\n",
+				    ncpus);
 				if (opt_abort) {
 					abort();
 				}
 				return true;
 			}
 			/* NB: opt_percpu_arena isn't fully initialized yet. */
-			if (percpu_arena_as_initialized(opt_percpu_arena) ==
-			    per_phycpu_arena && ncpus % 2 != 0) {
-				malloc_printf("<jemalloc>: invalid "
+			if (percpu_arena_as_initialized(opt_percpu_arena)
+			        == per_phycpu_arena
+			    && ncpus % 2 != 0) {
+				malloc_printf(
+				    "<jemalloc>: invalid "
 				    "configuration -- per physical CPU arena "
 				    "with odd number (%u) of CPUs (no hyper "
-				    "threading?).\n", ncpus);
+				    "threading?).\n",
+				    ncpus);
 				if (opt_abort)
 					abort();
 			}
@@ -2071,7 +1151,7 @@ malloc_init_narenas(void) {
 		    narenas_auto);
 	}
 	narenas_total_set(narenas_auto);
-	if (arena_init_huge()) {
+	if (arena_init_huge(tsdn, a0)) {
 		narenas_total_inc();
 	}
 	manual_arena_base = narenas_total_get();
@@ -2112,21 +1192,30 @@ static bool
 malloc_init_hard(void) {
 	tsd_t *tsd;
 
+	assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
+	/*
+	 * This asserts an extreme case where TINY_MAXCLASS is larger
+	 * than LARGE_MINCLASS.  It could only happen if some constants
+	 * are configured miserably wrong.
+	 */
+	assert(SC_LG_TINY_MAXCLASS <= (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
+
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
 
-#define UNLOCK_RETURN(tsdn, ret, reentrancy)		\
-	malloc_init_hard_cleanup(tsdn, reentrancy);	\
+#define UNLOCK_RETURN(tsdn, ret, reentrancy)                                   \
+	malloc_init_hard_cleanup(tsdn, reentrancy);                            \
 	return ret;
 
 	if (!malloc_init_hard_needed()) {
 		UNLOCK_RETURN(TSDN_NULL, false, false)
 	}
 
-	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked()) {
+	if (malloc_init_state != malloc_init_a0_initialized
+	    && malloc_init_hard_a0_locked()) {
 		UNLOCK_RETURN(TSDN_NULL, true, false)
 	}
 
@@ -2144,10 +1233,24 @@ malloc_init_hard(void) {
 	/* Set reentrancy level to 1 during init. */
 	pre_reentrancy(tsd, NULL);
 	/* Initialize narenas before prof_boot2 (for allocation). */
-	if (malloc_init_narenas()
+	if (malloc_init_narenas(tsd_tsdn(tsd))
 	    || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
+	if (opt_hpa) {
+		/*
+		 * We didn't initialize arena 0 hpa_shard in arena_new, because
+		 * background_thread_enabled wasn't initialized yet, but we
+		 * need it to set correct value for deferral_allowed.
+		 */
+		arena_t         *a0 = arena_get(tsd_tsdn(tsd), 0, false);
+		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
+		hpa_shard_opts.deferral_allowed = background_thread_enabled();
+		if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard,
+		        &hpa_shard_opts, &opt_hpa_sec_opts)) {
+			UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+		}
+	}
 	if (config_prof && prof_boot2(tsd, b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
@@ -2160,8 +1263,8 @@ malloc_init_hard(void) {
 	post_reentrancy(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 
-	witness_assert_lockless(witness_tsd_tsdn(
-	    tsd_witness_tsdp_get_unsafe(tsd)));
+	witness_assert_lockless(
+	    witness_tsd_tsdn(tsd_witness_tsdp_get_unsafe(tsd)));
 	malloc_tsd_boot1();
 	/* Update TSD after tsd_boot1. */
 	tsd = tsd_fetch();
@@ -2254,23 +1357,14 @@ static_opts_init(static_opts_t *static_opts) {
 	static_opts->usize = false;
 }
 
-/*
- * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
- * should have one constant here per magic value there.  Note however that the
- * representations need not be related.
- */
-#define TCACHE_IND_NONE ((unsigned)-1)
-#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
-#define ARENA_IND_AUTOMATIC ((unsigned)-1)
-
 typedef struct dynamic_opts_s dynamic_opts_t;
 struct dynamic_opts_s {
-	void **result;
-	size_t usize;
-	size_t num_items;
-	size_t item_size;
-	size_t alignment;
-	bool zero;
+	void   **result;
+	size_t   usize;
+	size_t   num_items;
+	size_t   item_size;
+	size_t   alignment;
+	bool     zero;
 	unsigned tcache_ind;
 	unsigned arena_ind;
 };
@@ -2301,7 +1395,9 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 			if (unlikely(*ind >= SC_NSIZES)) {
 				return true;
 			}
-			*usize = sz_index2size(*ind);
+			*usize = sz_large_size_classes_disabled()
+			    ? sz_s2u(size)
+			    : sz_index2size(*ind);
 			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
 			return false;
 		}
@@ -2327,36 +1423,6 @@ zero_get(bool guarantee, bool slow) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
-	tcache_t *tcache;
-	if (tcache_ind == TCACHE_IND_AUTOMATIC) {
-		if (likely(!slow)) {
-			/* Getting tcache ptr unconditionally. */
-			tcache = tsd_tcachep_get(tsd);
-			assert(tcache == tcache_get(tsd));
-		} else if (is_alloc ||
-		    likely(tsd_reentrancy_level_get(tsd) == 0)) {
-			tcache = tcache_get(tsd);
-		} else {
-			tcache = NULL;
-		}
-	} else {
-		/*
-		 * Should not specify tcache on deallocation path when being
-		 * reentrant.
-		 */
-		assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
-		    tsd_state_nocleanup(tsd));
-		if (tcache_ind == TCACHE_IND_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, tcache_ind);
-		}
-	}
-	return tcache;
-}
-
 /* Return true if a manual arena is specified and arena_get() OOMs. */
 JEMALLOC_ALWAYS_INLINE bool
 arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
@@ -2379,10 +1445,10 @@ arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
-    size_t size, size_t usize, szind_t ind) {
+    size_t size, size_t usize, szind_t ind, bool slab) {
 	/* Fill in the tcache. */
-	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
-	    sopts->slow, /* is_alloc */ true);
+	tcache_t *tcache = tcache_get_from_ind(
+	    tsd, dopts->tcache_ind, sopts->slow, /* is_alloc */ true);
 
 	/* Fill in the arena. */
 	arena_t *arena;
@@ -2391,12 +1457,12 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	}
 
 	if (unlikely(dopts->alignment != 0)) {
-		return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment,
-		    dopts->zero, tcache, arena);
+		return ipalloct_explicit_slab(tsd_tsdn(tsd), usize,
+		    dopts->alignment, dopts->zero, slab, tcache, arena);
 	}
 
-	return iallocztm(tsd_tsdn(tsd), size, ind, dopts->zero, tcache, false,
-	    arena, sopts->slow);
+	return iallocztm_explicit_slab(tsd_tsdn(tsd), size, ind, dopts->zero,
+	    slab, tcache, false, arena, sopts->slow);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -2404,29 +1470,26 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t usize, szind_t ind) {
 	void *ret;
 
+	dopts->alignment = prof_sample_align(usize, dopts->alignment);
 	/*
-	 * For small allocations, sampling bumps the usize.  If so, we allocate
-	 * from the ind_large bucket.
+	 * If the allocation is small enough that it would normally be allocated
+	 * on a slab, we need to take additional steps to ensure that it gets
+	 * its own extent instead.
 	 */
-	szind_t ind_large;
-	size_t bumped_usize = usize;
-
-	dopts->alignment = prof_sample_align(dopts->alignment);
-	if (usize <= SC_SMALL_MAXCLASS) {
-		assert(((dopts->alignment == 0) ?
-		    sz_s2u(SC_LARGE_MINCLASS) :
-		    sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment))
-			== SC_LARGE_MINCLASS);
-		ind_large = sz_size2index(SC_LARGE_MINCLASS);
-		bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
+	if (sz_can_use_slab(usize)) {
+		assert((dopts->alignment & PROF_SAMPLE_ALIGNMENT_MASK) == 0);
+		size_t  bumped_usize = sz_sa2u(usize, dopts->alignment);
+		szind_t bumped_ind = sz_size2index(bumped_usize);
+		dopts->tcache_ind = TCACHE_IND_NONE;
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
-		    bumped_usize, ind_large);
+		    bumped_usize, bumped_ind, /* slab */ false);
 		if (unlikely(ret == NULL)) {
 			return NULL;
 		}
-		arena_prof_promote(tsd_tsdn(tsd), ret, usize);
+		arena_prof_promote(tsd_tsdn(tsd), ret, usize, bumped_usize);
 	} else {
-		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
+		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind,
+		    /* slab */ false);
 	}
 	assert(prof_sample_aligned(ret));
 
@@ -2438,8 +1501,8 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
  * *size to the product either way.
  */
 JEMALLOC_ALWAYS_INLINE bool
-compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
-    size_t *size) {
+compute_size_with_overflow(
+    bool may_overflow, dynamic_opts_t *dopts, size_t *size) {
 	/*
 	 * This function is just num_items * item_size, except that we may have
 	 * to check for overflow.
@@ -2495,26 +1558,26 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	int8_t reentrancy_level;
 
 	/* Compute the amount of memory the user wants. */
-	if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
-	    &size))) {
+	if (unlikely(compute_size_with_overflow(
+	        sopts->may_overflow, dopts, &size))) {
 		goto label_oom;
 	}
 
 	if (unlikely(dopts->alignment < sopts->min_alignment
-	    || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
+	        || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
 		goto label_invalid_alignment;
 	}
 
 	/* This is the beginning of the "core" algorithm. */
 	dopts->zero = zero_get(dopts->zero, sopts->slow);
 	if (aligned_usize_get(size, dopts->alignment, &usize, &ind,
-	    sopts->bump_empty_aligned_alloc)) {
+	        sopts->bump_empty_aligned_alloc)) {
 		goto label_oom;
 	}
 	dopts->usize = usize;
 	/* Validate the user input. */
 	if (sopts->assert_nonempty_alloc) {
-		assert (size != 0);
+		assert(size != 0);
 	}
 
 	check_entry_exit_locking(tsd_tsdn(tsd));
@@ -2529,8 +1592,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		 * We should never specify particular arenas or tcaches from
 		 * within our internal allocations.
 		 */
-		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC ||
-		    dopts->tcache_ind == TCACHE_IND_NONE);
+		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC
+		    || dopts->tcache_ind == TCACHE_IND_NONE);
 		assert(dopts->arena_ind == ARENA_IND_AUTOMATIC);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		/* We know that arena 0 has already been initialized. */
@@ -2547,15 +1610,15 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	if (config_prof && opt_prof) {
 		bool prof_active = prof_active_get_unlocked();
 		bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
-		prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active,
-		    sample_event);
+		prof_tctx_t *tctx = prof_alloc_prep(
+		    tsd, prof_active, sample_event);
 
 		emap_alloc_ctx_t alloc_ctx;
-		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-			alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS);
-			allocation = imalloc_no_sample(
-			    sopts, dopts, tsd, usize, usize, ind);
-		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
+		if (likely(tctx == PROF_TCTX_SENTINEL)) {
+			alloc_ctx.slab = sz_can_use_slab(usize);
+			allocation = imalloc_no_sample(sopts, dopts, tsd, usize,
+			    usize, ind, alloc_ctx.slab);
+		} else if (tctx != NULL) {
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
 			alloc_ctx.slab = false;
@@ -2571,7 +1634,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	} else {
 		assert(!opt_prof);
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
-		    ind);
+		    ind, sz_can_use_slab(usize));
 		if (unlikely(allocation == NULL)) {
 			goto label_oom;
 		}
@@ -2698,8 +1761,8 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 JEMALLOC_NOINLINE
 void *
 malloc_default(size_t size) {
-	void *ret;
-	static_opts_t sopts;
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	/*
@@ -2729,8 +1792,6 @@ malloc_default(size_t size) {
 		hook_invoke_alloc(hook_alloc_malloc, ret, (uintptr_t)ret, args);
 	}
 
-	LOG("core.malloc.exit", "result: %p", ret);
-
 	return ret;
 }
 
@@ -2739,22 +1800,28 @@ malloc_default(size_t size) {
  * Begin malloc(3)-compatible functions.
  */
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_malloc(size_t size) {
-	return imalloc_fastpath(size, &malloc_default);
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) {
+	LOG("core.malloc.entry", "size: %zu", size);
+
+	void *ret = imalloc_fastpath(size, &malloc_default);
+
+	LOG("core.malloc.exit", "result: %p", ret);
+	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
-je_posix_memalign(void **memptr, size_t alignment, size_t size) {
-	int ret;
-	static_opts_t sopts;
+    je_posix_memalign(void **memptr, size_t alignment, size_t size) {
+	int            ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
-	LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
-	    "size: %zu", memptr, alignment, size);
+	LOG("core.posix_memalign.entry",
+	    "mem ptr: %p, alignment: %zu, "
+	    "size: %zu",
+	    memptr, alignment, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2773,10 +1840,10 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 
 	ret = imalloc(&sopts, &dopts);
 	if (sopts.slow) {
-		uintptr_t args[3] = {(uintptr_t)memptr, (uintptr_t)alignment,
-			(uintptr_t)size};
-		hook_invoke_alloc(hook_alloc_posix_memalign, *memptr,
-		    (uintptr_t)ret, args);
+		uintptr_t args[3] = {
+		    (uintptr_t)memptr, (uintptr_t)alignment, (uintptr_t)size};
+		hook_invoke_alloc(
+		    hook_alloc_posix_memalign, *memptr, (uintptr_t)ret, args);
 	}
 
 	LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
@@ -2785,13 +1852,13 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	return ret;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
-je_aligned_alloc(size_t alignment, size_t size) {
+    je_aligned_alloc(size_t alignment, size_t size) {
 	void *ret;
 
-	static_opts_t sopts;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
@@ -2817,8 +1884,8 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {(uintptr_t)alignment, (uintptr_t)size};
-		hook_invoke_alloc(hook_alloc_aligned_alloc, ret,
-		    (uintptr_t)ret, args);
+		hook_invoke_alloc(
+		    hook_alloc_aligned_alloc, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.aligned_alloc.exit", "result: %p", ret);
@@ -2826,15 +1893,15 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	return ret;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
-je_calloc(size_t num, size_t size) {
-	void *ret;
-	static_opts_t sopts;
+    je_calloc(size_t num, size_t size) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
-	LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
+	LOG("core.calloc.entry", "num: %zu, size: %zu", num, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2874,51 +1941,26 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
-	size_t usize = sz_index2size(alloc_ctx.szind);
+	size_t usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
 
 	if (likely(!slow_path)) {
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
-		    false);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false, false);
 	} else {
 		if (config_fill && slow_path && opt_junk_free) {
 			junk_free_callback(ptr, usize);
 		}
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
-		    true);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false, true);
 	}
 	thread_dalloc_event(tsd, usize);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
-	if (config_opt_size_checks) {
-		emap_alloc_ctx_t dbg_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		    &dbg_ctx);
-		if (alloc_ctx->szind != dbg_ctx.szind) {
-			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ true, ptr,
-			    /* true_size */ sz_size2index(dbg_ctx.szind),
-			    /* input_size */ sz_size2index(alloc_ctx->szind));
-			return true;
-		}
-		if (alloc_ctx->slab != dbg_ctx.slab) {
-			safety_check_fail(
-			    "Internal heap corruption detected: "
-			    "mismatch in slab bit");
-			return true;
-		}
-	}
-	return false;
-}
-
 JEMALLOC_ALWAYS_INLINE void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
@@ -2933,35 +1975,41 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
+	szind_t          szind = sz_size2index(usize);
 	if (!config_prof) {
-		alloc_ctx.szind = sz_size2index(usize);
-		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+		emap_alloc_ctx_init(
+		    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 	} else {
 		if (likely(!prof_sample_aligned(ptr))) {
 			/*
 			 * When the ptr is not page aligned, it was not sampled.
 			 * usize can be trusted to determine szind and slab.
 			 */
-			alloc_ctx.szind = sz_size2index(usize);
-			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+			emap_alloc_ctx_init(
+			    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 		} else if (opt_prof) {
-			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
-			    ptr, &alloc_ctx);
+			/*
+			 * Small sampled allocs promoted can still get correct
+			 * usize here.  Check comments in edata_usize_get.
+			 */
+			emap_alloc_ctx_lookup(
+			    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
-				if (unlikely(alloc_ctx.szind !=
-				    sz_size2index(usize))) {
+				size_t true_size = emap_alloc_ctx_usize_get(
+				    &alloc_ctx);
+				if (unlikely(alloc_ctx.szind
+				        != sz_size2index(usize))) {
 					safety_check_fail_sized_dealloc(
 					    /* current_dealloc */ true, ptr,
-					    /* true_size */ sz_index2size(
-					    alloc_ctx.szind),
+					    /* true_size */ true_size,
 					    /* input_size */ usize);
 				}
 			}
 		} else {
-			alloc_ctx.szind = sz_size2index(usize);
-			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+			emap_alloc_ctx_init(
+			    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 		}
 	}
 	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
@@ -2979,14 +2027,12 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
 	if (likely(!slow_path)) {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
-		    false);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx, false);
 	} else {
 		if (config_fill && slow_path && opt_junk_free) {
 			junk_free_callback(ptr, usize);
 		}
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
-		    true);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx, true);
 	}
 	thread_dalloc_event(tsd, usize);
 }
@@ -3025,146 +2071,29 @@ free_default(void *ptr) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
-	/*
-	 * free_fastpath do not handle two uncommon cases: 1) sampled profiled
-	 * objects and 2) sampled junk & stash for use-after-free detection.
-	 * Both have special alignments which are used to escape the fastpath.
-	 *
-	 * prof_sample is page-aligned, which covers the UAF check when both
-	 * are enabled (the assertion below).  Avoiding redundant checks since
-	 * this is on the fastpath -- at most one runtime branch from this.
-	 */
-	if (config_debug && cache_bin_nonfast_aligned(ptr)) {
-		assert(prof_sample_aligned(ptr));
-	}
-
-	if (config_prof && check_prof) {
-		/* When prof is enabled, the prof_sample alignment is enough. */
-		if (prof_sample_aligned(ptr)) {
-			return true;
-		} else {
-			return false;
-		}
-	}
-
-	if (config_uaf_detection) {
-		if (cache_bin_nonfast_aligned(ptr)) {
-			return true;
-		} else {
-			return false;
-		}
-	}
-
-	return false;
-}
-
-/* Returns whether or not the free attempt was successful. */
-JEMALLOC_ALWAYS_INLINE
-bool free_fastpath(void *ptr, size_t size, bool size_hint) {
-	tsd_t *tsd = tsd_get(false);
-	/* The branch gets optimized away unless tsd_get_allocates(). */
-	if (unlikely(tsd == NULL)) {
-		return false;
-	}
-	/*
-	 *  The tsd_fast() / initialized checks are folded into the branch
-	 *  testing (deallocated_after >= threshold) later in this function.
-	 *  The threshold will be set to 0 when !tsd_fast.
-	 */
-	assert(tsd_fast(tsd) ||
-	    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
-
-	emap_alloc_ctx_t alloc_ctx;
-	if (!size_hint) {
-		bool err = emap_alloc_ctx_try_lookup_fast(tsd,
-		    &arena_emap_global, ptr, &alloc_ctx);
-
-		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(err || !alloc_ctx.slab ||
-		    free_fastpath_nonfast_aligned(ptr,
-		    /* check_prof */ false))) {
-			return false;
-		}
-		assert(alloc_ctx.szind != SC_NSIZES);
-	} else {
-		/*
-		 * Check for both sizes that are too large, and for sampled /
-		 * special aligned objects.  The alignment check will also check
-		 * for null ptr.
-		 */
-		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-		    free_fastpath_nonfast_aligned(ptr,
-		    /* check_prof */ true))) {
-			return false;
-		}
-		alloc_ctx.szind = sz_size2index_lookup(size);
-		/* Max lookup class must be small. */
-		assert(alloc_ctx.szind < SC_NBINS);
-		/* This is a dead store, except when opt size checking is on. */
-		alloc_ctx.slab = true;
-	}
-	/*
-	 * Currently the fastpath only handles small sizes.  The branch on
-	 * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
-	 * tcache szind upper limit (i.e. tcache_maxclass) as well.
-	 */
-	assert(alloc_ctx.slab);
-
-	uint64_t deallocated, threshold;
-	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
-
-	size_t usize = sz_index2size(alloc_ctx.szind);
-	uint64_t deallocated_after = deallocated + usize;
-	/*
-	 * Check for events and tsd non-nominal (fast_threshold will be set to
-	 * 0) in a single branch.  Note that this handles the uninitialized case
-	 * as well (TSD init will be triggered on the non-fastpath).  Therefore
-	 * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
-	 * below) needs to be after this branch.
-	 */
-	if (unlikely(deallocated_after >= threshold)) {
-		return false;
-	}
-	assert(tsd_fast(tsd));
-	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
-	if (fail) {
-		/* See the comment in isfree. */
-		return true;
-	}
-
-	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
-	    /* slow */ false, /* is_alloc */ false);
-	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
-
-	/*
-	 * If junking were enabled, this is where we would do it.  It's not
-	 * though, since we ensured above that we're on the fast path.  Assert
-	 * that to double-check.
-	 */
-	assert(!opt_junk_free);
-
-	if (!cache_bin_dalloc_easy(bin, ptr)) {
-		return false;
-	}
-
-	*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
-
-	return true;
-}
-
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
 	LOG("core.free.entry", "ptr: %p", ptr);
 
-	if (!free_fastpath(ptr, 0, false)) {
-		free_default(ptr);
-	}
+	je_free_impl(ptr);
 
 	LOG("core.free.exit", "");
 }
 
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_free_sized(void *ptr, size_t size) {
+	LOG("core.free_sized.entry", "ptr: %p, size: %zu", ptr, size);
+
+	je_sdallocx_noflags(ptr, size);
+
+	LOG("core.free_sized.exit", "");
+}
+
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_free_aligned_sized(void *ptr, size_t alignment, size_t size) {
+	return je_sdallocx(ptr, size, /* flags */ MALLOCX_ALIGN(alignment));
+}
+
 /*
  * End malloc(3)-compatible functions.
  */
@@ -3174,12 +2103,11 @@ je_free(void *ptr) {
  */
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc)
-je_memalign(size_t alignment, size_t size) {
-	void *ret;
-	static_opts_t sopts;
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_memalign(size_t alignment, size_t size) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
@@ -3188,6 +2116,7 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
+	sopts.bump_empty_aligned_alloc = true;
 	sopts.min_alignment = 1;
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
@@ -3203,8 +2132,8 @@ je_memalign(size_t alignment, size_t size) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {alignment, size};
-		hook_invoke_alloc(hook_alloc_memalign, ret, (uintptr_t)ret,
-		    args);
+		hook_invoke_alloc(
+		    hook_alloc_memalign, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.memalign.exit", "result: %p", ret);
@@ -3213,13 +2142,12 @@ je_memalign(size_t alignment, size_t size) {
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc)
-je_valloc(size_t size) {
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_valloc(size_t size) {
 	void *ret;
 
-	static_opts_t sopts;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.valloc.entry", "size: %zu\n", size);
@@ -3250,6 +2178,48 @@ je_valloc(size_t size) {
 }
 #endif
 
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_pvalloc(size_t size) {
+	void *ret;
+
+	static_opts_t  sopts;
+	dynamic_opts_t dopts;
+
+	LOG("core.pvalloc.entry", "size: %zu\n", size);
+
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
+
+	sopts.null_out_result_on_error = true;
+	sopts.min_alignment = PAGE;
+	sopts.oom_string =
+	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
+	sopts.invalid_alignment_string =
+	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+
+	dopts.result = &ret;
+	dopts.num_items = 1;
+	/*
+	 * This is the only difference from je_valloc - size is rounded up to
+	 * a PAGE multiple.
+	 */
+	dopts.item_size = PAGE_CEILING(size);
+	dopts.alignment = PAGE;
+
+	imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {size};
+		hook_invoke_alloc(
+		    hook_alloc_pvalloc, ret, (uintptr_t)ret, args);
+	}
+
+	LOG("core.pvalloc.exit", "result: %p\n", ret);
+	return ret;
+}
+#endif
+
 #if defined(JEMALLOC_IS_MALLOC) && defined(JEMALLOC_GLIBC_MALLOC_HOOK)
 /*
  * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
@@ -3260,49 +2230,59 @@ je_valloc(size_t size) {
  * passed an extra argument for the caller return address, which will be
  * ignored.
  */
-#include <features.h> // defines __GLIBC__ if we are compiling against glibc
+#	include <features.h> // defines __GLIBC__ if we are compiling against glibc
 
 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
-#  ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
-JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
-    je_memalign;
-#  endif
+#	ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
+JEMALLOC_EXPORT void *(*__memalign_hook)(
+    size_t alignment, size_t size) = je_memalign;
+#	endif
 
-#  ifdef __GLIBC__
+#	ifdef __GLIBC__
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
  * link.
  */
-#    define ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+#		define ALIAS(je_fn) __attribute__((alias(#je_fn), used))
 /* To force macro expansion of je_ prefix before stringification. */
-#    define PREALIAS(je_fn)	ALIAS(je_fn)
-#    ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
+#		define PREALIAS(je_fn) ALIAS(je_fn)
+#		ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
 void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE
-void __libc_free(void* ptr) PREALIAS(je_free);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE
+void __libc_free(void *ptr) PREALIAS(je_free);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
+void __libc_free_sized(void *ptr, size_t size) PREALIAS(je_free_sized);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
+void __libc_free_aligned_sized(void *ptr, size_t alignment, size_t size)
+    PREALIAS(je_free_aligned_sized);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
 void *__libc_malloc(size_t size) PREALIAS(je_malloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
-void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
+void *__libc_realloc(void *ptr, size_t size) PREALIAS(je_realloc);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
 void *__libc_valloc(size_t size) PREALIAS(je_valloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
-int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
-#    endif
-#    undef PREALIAS
-#    undef ALIAS
-#  endif
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_PVALLOC
+void *__libc_pvalloc(size_t size) PREALIAS(je_pvalloc);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+int __posix_memalign(void **r, size_t a, size_t s) PREALIAS(je_posix_memalign);
+#		endif
+#		undef PREALIAS
+#		undef ALIAS
+#	endif
 #endif
 
 /*
@@ -3335,23 +2315,23 @@ mallocx_arena_get(int flags) {
 
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
-#define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y
-#define JEMALLOC_SMALLOCX_CONCAT_HELPER2(x, y)  \
-  JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y)
+#	define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x##y
+#	define JEMALLOC_SMALLOCX_CONCAT_HELPER2(x, y)                         \
+		JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y)
 
 typedef struct {
-	void *ptr;
+	void  *ptr;
 	size_t size;
 } smallocx_return_t;
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-smallocx_return_t JEMALLOC_NOTHROW
-/*
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN smallocx_return_t
+    JEMALLOC_NOTHROW
+    /*
  * The attribute JEMALLOC_ATTR(malloc) cannot be used due to:
  *  - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86488
  */
-JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
-  (size_t size, int flags) {
+    JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)(
+        size_t size, int flags) {
 	/*
 	 * Note: the attribute JEMALLOC_ALLOC_SIZE(1) cannot be
 	 * used here because it makes writing beyond the `size`
@@ -3360,8 +2340,8 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	 * up to `smallocx_return_t::size`.
 	 */
 	smallocx_return_t ret;
-	static_opts_t sopts;
-	dynamic_opts_t dopts;
+	static_opts_t     sopts;
+	dynamic_opts_t    dopts;
 
 	LOG("core.smallocx.entry", "size: %zu, flags: %d", size, flags);
 
@@ -3390,16 +2370,16 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	LOG("core.smallocx.exit", "result: %p, size: %zu", ret.ptr, ret.size);
 	return ret;
 }
-#undef JEMALLOC_SMALLOCX_CONCAT_HELPER
-#undef JEMALLOC_SMALLOCX_CONCAT_HELPER2
+#	undef JEMALLOC_SMALLOCX_CONCAT_HELPER
+#	undef JEMALLOC_SMALLOCX_CONCAT_HELPER2
 #endif
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_mallocx(size_t size, int flags) {
-	void *ret;
-	static_opts_t sopts;
+    je_mallocx(size_t size, int flags) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
@@ -3424,8 +2404,8 @@ je_mallocx(size_t size, int flags) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {size, flags};
-		hook_invoke_alloc(hook_alloc_mallocx, ret, (uintptr_t)ret,
-		    args);
+		hook_invoke_alloc(
+		    hook_alloc_mallocx, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.mallocx.exit", "result: %p", ret);
@@ -3442,18 +2422,25 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 		return NULL;
 	}
 
-	alignment = prof_sample_align(alignment);
-	if (usize <= SC_SMALL_MAXCLASS) {
-		p = iralloct(tsdn, old_ptr, old_usize,
-		    SC_LARGE_MINCLASS, alignment, zero, tcache,
+	alignment = prof_sample_align(usize, alignment);
+	/*
+	 * If the allocation is small enough that it would normally be allocated
+	 * on a slab, we need to take additional steps to ensure that it gets
+	 * its own extent instead.
+	 */
+	if (sz_can_use_slab(usize)) {
+		size_t bumped_usize = sz_sa2u(usize, alignment);
+		p = iralloct_explicit_slab(tsdn, old_ptr, old_usize,
+		    bumped_usize, alignment, zero, /* slab */ false, tcache,
 		    arena, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
-		arena_prof_promote(tsdn, p, usize);
+		arena_prof_promote(tsdn, p, usize, bumped_usize);
 	} else {
-		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
-		    tcache, arena, hook_args);
+		p = iralloct_explicit_slab(tsdn, old_ptr, old_usize, usize,
+		    alignment, zero, /* slab */ false, tcache, arena,
+		    hook_args);
 	}
 	assert(prof_sample_aligned(p));
 
@@ -3462,40 +2449,39 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
-    size_t alignment, size_t usize, bool zero, tcache_t *tcache,
-    arena_t *arena, emap_alloc_ctx_t *alloc_ctx,
-    hook_ralloc_args_t *hook_args) {
+    size_t alignment, size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
+    emap_alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
-	bool prof_active = prof_active_get_unlocked();
-	bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
+	bool         prof_active = prof_active_get_unlocked();
+	bool         sample_event = te_prof_sample_event_lookahead(tsd, usize);
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
-	void *p;
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+	void        *p;
+	if (unlikely(tctx != PROF_TCTX_SENTINEL)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
 		    usize, alignment, zero, tcache, arena, tctx, hook_args);
 	} else {
 		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
-		    zero, tcache, arena, hook_args);
+		    usize, zero, tcache, arena, hook_args);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx);
 		return NULL;
 	}
 	assert(usize == isalloc(tsd_tsdn(tsd), p));
-	prof_realloc(tsd, p, size, usize, tctx, prof_active, old_ptr,
-	    old_usize, &old_prof_info, sample_event);
+	prof_realloc(tsd, p, size, usize, tctx, prof_active, old_ptr, old_usize,
+	    &old_prof_info, sample_event);
 
 	return p;
 }
 
 static void *
 do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
-	void *p;
-	tsd_t *tsd;
-	size_t usize;
-	size_t old_usize;
-	size_t alignment = MALLOCX_ALIGN_GET(flags);
+	void    *p;
+	tsd_t   *tsd;
+	size_t   usize;
+	size_t   old_usize;
+	size_t   alignment = MALLOCX_ALIGN_GET(flags);
 	arena_t *arena;
 
 	assert(ptr != NULL);
@@ -3511,22 +2497,22 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		goto label_oom;
 	}
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 	    /* slow */ true, /* is_alloc */ true);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
-	old_usize = sz_index2size(alloc_ctx.szind);
+	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
 		goto label_oom;
 	}
 
-	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
-		flags, 0}};
+	hook_ralloc_args_t hook_args = {
+	    is_realloc, {(uintptr_t)ptr, size, flags, 0}};
 	if (config_prof && opt_prof) {
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
@@ -3535,7 +2521,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		}
 	} else {
 		p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
-		    zero, tcache, arena, &hook_args);
+		    usize, zero, tcache, arena, &hook_args);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
@@ -3551,12 +2537,15 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
 	    && !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((uintptr_t)p + old_usize);
+		void  *excess_start = (void *)((byte_t *)p + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 
 	return p;
 label_oom:
+	if (is_realloc) {
+		set_errno(ENOMEM);
+	}
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
 		abort();
@@ -3567,12 +2556,11 @@ label_oom:
 	return NULL;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_rallocx(void *ptr, size_t size, int flags) {
-	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-	    size, flags);
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2) je_rallocx(void *ptr, size_t size, int flags) {
+	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size,
+	    flags);
 	void *ret = do_rallocx(ptr, size, flags, false);
 	LOG("core.rallocx.exit", "result: %p", ret);
 	return ret;
@@ -3606,7 +2594,8 @@ do_realloc_nonnull_zero(void *ptr) {
 		check_entry_exit_locking(tsd_tsdn(tsd));
 		return NULL;
 	} else {
-		safety_check_fail("Called realloc(non-null-ptr, 0) with "
+		safety_check_fail(
+		    "Called realloc(non-null-ptr, 0) with "
 		    "zero_realloc:abort set\n");
 		/* In real code, this will never run; the safety check failure
 		 * will call abort.  In the unit test, we just want to bail out
@@ -3617,10 +2606,9 @@ do_realloc_nonnull_zero(void *ptr) {
 	}
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t size) {
+JEMALLOC_EXPORT
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) {
 	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (likely(ptr != NULL && size != 0)) {
@@ -3635,7 +2623,7 @@ je_realloc(void *ptr, size_t size) {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret;
 
-		static_opts_t sopts;
+		static_opts_t  sopts;
 		dynamic_opts_t dopts;
 
 		static_opts_init(&sopts);
@@ -3653,8 +2641,8 @@ je_realloc(void *ptr, size_t size) {
 		imalloc(&sopts, &dopts);
 		if (sopts.slow) {
 			uintptr_t args[3] = {(uintptr_t)ptr, size};
-			hook_invoke_alloc(hook_alloc_realloc, ret,
-			    (uintptr_t)ret, args);
+			hook_invoke_alloc(
+			    hook_alloc_realloc, ret, (uintptr_t)ret, args);
 		}
 		LOG("core.realloc.exit", "result: %p", ret);
 		return ret;
@@ -3666,8 +2654,8 @@ ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero) {
 	size_t newsize;
 
-	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero,
-	    &newsize)) {
+	if (ixalloc(
+	        tsdn, ptr, old_usize, size, extra, alignment, zero, &newsize)) {
 		return old_usize;
 	}
 
@@ -3682,8 +2670,8 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 		return old_usize;
 	}
 
-	return ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
-	    zero);
+	return ixallocx_helper(
+	    tsdn, ptr, old_usize, size, extra, alignment, zero);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -3703,8 +2691,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
 	size_t usize_max;
-	if (aligned_usize_get(size + extra, alignment, &usize_max, NULL,
-	    false)) {
+	if (aligned_usize_get(
+	        size + extra, alignment, &usize_max, NULL, false)) {
 		/*
 		 * usize_max is out of range, and chances are that allocation
 		 * will fail, but use the maximum possible value and carry on
@@ -3717,7 +2705,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 
 	size_t usize;
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+	if (unlikely(tctx != PROF_TCTX_SENTINEL)) {
 		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
 		    size, extra, alignment, zero, tctx);
 	} else {
@@ -3738,7 +2726,15 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
 		prof_alloc_rollback(tsd, tctx);
 	} else {
-		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+		/*
+		 * Need to retrieve the new alloc_ctx since the modification
+		 * to edata has already been done.
+		 */
+		emap_alloc_ctx_t new_alloc_ctx;
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &new_alloc_ctx);
+		prof_info_get_and_reset_recent(
+		    tsd, ptr, &new_alloc_ctx, &prof_info);
 		assert(usize <= usize_max);
 		sample_event = te_prof_sample_event_lookahead(tsd, usize);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
@@ -3754,10 +2750,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
-	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
+	bool   zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
-	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
-	    "flags: %d", ptr, size, extra, flags);
+	LOG("core.xallocx.entry",
+	    "ptr: %p, size: %zu, extra: %zu, "
+	    "flags: %d",
+	    ptr, size, extra, flags);
 
 	assert(ptr != NULL);
 	assert(size != 0);
@@ -3771,14 +2769,14 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd),
-	    &arena_emap_global, ptr);
+	edata_t *old_edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
-	old_usize = sz_index2size(alloc_ctx.szind);
+	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -3818,17 +2816,17 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
-	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize &&
-	    !zero) {
+	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
+	    && !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((uintptr_t)ptr + old_usize);
+		void  *excess_start = (void *)((byte_t *)ptr + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
-		hook_invoke_expand(hook_expand_xallocx, ptr, old_usize,
-		    usize, (uintptr_t)usize, args);
+		hook_invoke_expand(hook_expand_xallocx, ptr, old_usize, usize,
+		    (uintptr_t)usize, args);
 	}
 
 	UTRACE(ptr, size, ptr);
@@ -3839,9 +2837,8 @@ label_not_resized:
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-JEMALLOC_ATTR(pure)
-je_sallocx(const void *ptr, int flags) {
-	size_t usize;
+JEMALLOC_ATTR(pure) je_sallocx(const void *ptr, int flags) {
+	size_t  usize;
 	tsdn_t *tsdn;
 
 	LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
@@ -3873,10 +2870,10 @@ je_dallocx(void *ptr, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd_t *tsd = tsd_fetch_min();
-	bool fast = tsd_fast(tsd);
+	bool   fast = tsd_fast(tsd);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
 	    /* is_alloc */ false);
 
@@ -3910,11 +2907,11 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd_t *tsd = tsd_fetch_min();
-	bool fast = tsd_fast(tsd);
+	bool   fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
 	    /* is_alloc */ false);
 
@@ -3932,32 +2929,17 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_sdallocx(void *ptr, size_t size, int flags) {
-	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-		size, flags);
+	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size,
+	    flags);
 
-	if (flags != 0 || !free_fastpath(ptr, size, true)) {
-		sdallocx_default(ptr, size, flags);
-	}
-
-	LOG("core.sdallocx.exit", "");
-}
-
-void JEMALLOC_NOTHROW
-je_sdallocx_noflags(void *ptr, size_t size) {
-	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
-		size);
-
-	if (!free_fastpath(ptr, size, true)) {
-		sdallocx_default(ptr, size, 0);
-	}
+	je_sdallocx_impl(ptr, size, flags);
 
 	LOG("core.sdallocx.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-JEMALLOC_ATTR(pure)
-je_nallocx(size_t size, int flags) {
-	size_t usize;
+JEMALLOC_ATTR(pure) je_nallocx(size_t size, int flags) {
+	size_t  usize;
 	tsdn_t *tsdn;
 
 	assert(size != 0);
@@ -3982,9 +2964,9 @@ je_nallocx(size_t size, int flags) {
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
-je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+je_mallctl(
+    const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	tsd_t *tsd;
 
 	LOG("core.mallctl.entry", "name: %s", name);
@@ -4025,8 +3007,8 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-  void *newp, size_t newlen) {
-	int ret;
+    void *newp, size_t newlen) {
+	int    ret;
 	tsd_t *tsd;
 
 	LOG("core.mallctlbymib.entry", "");
@@ -4046,8 +3028,8 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 #define STATS_PRINT_BUFSIZE 65536
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
-je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts) {
+je_malloc_stats_print(
+    void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) {
 	tsdn_t *tsdn;
 
 	LOG("core.malloc_stats_print.entry", "");
@@ -4118,12 +3100,12 @@ je_malloc_size(const void *ptr) {
 static void
 batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
 	assert(config_prof && opt_prof);
-	bool prof_sample_event = te_prof_sample_event_lookahead(tsd,
-	    batch * usize);
+	bool prof_sample_event = te_prof_sample_event_lookahead(
+	    tsd, batch * usize);
 	assert(!prof_sample_event);
 	size_t surplus;
-	prof_sample_event = te_prof_sample_event_lookahead_surplus(tsd,
-	    (batch + 1) * usize, &surplus);
+	prof_sample_event = te_prof_sample_event_lookahead_surplus(
+	    tsd, (batch + 1) * usize, &surplus);
 	assert(prof_sample_event);
 	assert(surplus < usize);
 }
@@ -4148,14 +3130,14 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		goto label_done;
 	}
 	szind_t ind = sz_size2index(usize);
-	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
+	bool    zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
 	/*
 	 * The cache bin and arena will be lazily initialized; it's hard to
 	 * know in advance whether each of them needs to be initialized.
 	 */
 	cache_bin_t *bin = NULL;
-	arena_t *arena = NULL;
+	arena_t     *arena = NULL;
 
 	size_t nregs = 0;
 	if (likely(ind < SC_NBINS)) {
@@ -4166,10 +3148,10 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 	while (filled < num) {
 		size_t batch = num - filled;
 		size_t surplus = SIZE_MAX; /* Dead store. */
-		bool prof_sample_event = config_prof && opt_prof
+		bool   prof_sample_event = config_prof && opt_prof
 		    && prof_active_get_unlocked()
-		    && te_prof_sample_event_lookahead_surplus(tsd,
-		    batch * usize, &surplus);
+		    && te_prof_sample_event_lookahead_surplus(
+		        tsd, batch * usize, &surplus);
 
 		if (prof_sample_event) {
 			/*
@@ -4185,8 +3167,8 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		if (likely(ind < SC_NBINS) && batch >= nregs) {
 			if (arena == NULL) {
 				unsigned arena_ind = mallocx_arena_get(flags);
-				if (arena_get_from_ind(tsd, arena_ind,
-				    &arena)) {
+				if (arena_get_from_ind(
+				        tsd, arena_ind, &arena)) {
 					goto label_done;
 				}
 				if (arena == NULL) {
@@ -4203,15 +3185,16 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 			filled += n;
 		}
 
-		if (likely(ind < nhbins) && progress < batch) {
+		unsigned  tcache_ind = mallocx_tcache_get(flags);
+		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
+		    /* slow */ true, /* is_alloc */ true);
+		if (likely(tcache != NULL
+		        && ind < tcache_nbins_get(tcache->tcache_slow)
+		        && !tcache_bin_disabled(
+		            ind, &tcache->bins[ind], tcache->tcache_slow))
+		    && progress < batch) {
 			if (bin == NULL) {
-				unsigned tcache_ind = mallocx_tcache_get(flags);
-				tcache_t *tcache = tcache_get_from_ind(tsd,
-				    tcache_ind, /* slow */ true,
-				    /* is_alloc */ true);
-				if (tcache != NULL) {
-					bin = &tcache->bins[ind];
-				}
+				bin = &tcache->bins[ind];
 			}
 			/*
 			 * If we don't have a tcache bin, we don't want to
@@ -4240,22 +3223,22 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 				 * additional benefit is that the tcache will
 				 * not be empty for the next allocation request.
 				 */
-				size_t n = cache_bin_alloc_batch(bin, bin_batch,
-				    ptrs + filled);
+				size_t n = cache_bin_alloc_batch(
+				    bin, bin_batch, ptrs + filled);
 				if (config_stats) {
 					bin->tstats.nrequests += n;
 				}
 				if (zero) {
 					for (size_t i = 0; i < n; ++i) {
-						memset(ptrs[filled + i], 0,
-						    usize);
+						memset(
+						    ptrs[filled + i], 0, usize);
 					}
 				}
 				if (config_prof && opt_prof
 				    && unlikely(ind >= SC_NBINS)) {
 					for (size_t i = 0; i < n; ++i) {
-						prof_tctx_reset_sampled(tsd,
-						    ptrs[filled + i]);
+						prof_tctx_reset_sampled(
+						    tsd, ptrs[filled + i]);
 					}
 				}
 				progress += n;
@@ -4331,7 +3314,7 @@ JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, j, narenas;
 	arena_t *arena;
 
@@ -4361,8 +3344,8 @@ _malloc_prefork(void)
 	/* Break arena prefork into stages to preserve lock order. */
 	for (i = 0; i < 9; i++) {
 		for (j = 0; j < narenas; j++) {
-			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
-			    NULL) {
+			if ((arena = arena_get(tsd_tsdn(tsd), j, false))
+			    != NULL) {
 				switch (i) {
 				case 0:
 					arena_prefork0(tsd_tsdn(tsd), arena);
@@ -4391,11 +3374,11 @@ _malloc_prefork(void)
 				case 8:
 					arena_prefork8(tsd_tsdn(tsd), arena);
 					break;
-				default: not_reached();
+				default:
+					not_reached();
 				}
 			}
 		}
-
 	}
 	prof_prefork1(tsd_tsdn(tsd));
 	stats_prefork(tsd_tsdn(tsd));
@@ -4410,7 +3393,7 @@ JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
@@ -4445,7 +3428,7 @@ _malloc_postfork(void)
 
 void
 jemalloc_postfork_child(void) {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, narenas;
 
 	assert(malloc_initialized());
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 451655f1..4e838d3b 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -1,7 +1,7 @@
 #include <mutex>
 #include <new>
+// NOLINTBEGIN(misc-use-anonymous-namespace)
 
-#define JEMALLOC_CPP_CPP_
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -24,41 +24,54 @@ extern "C" {
 //
 // ... but it needs to work with jemalloc namespaces.
 
-void	*operator new(std::size_t size);
-void	*operator new[](std::size_t size);
-void	*operator new(std::size_t size, const std::nothrow_t &) noexcept;
-void	*operator new[](std::size_t size, const std::nothrow_t &) noexcept;
-void	operator delete(void *ptr) noexcept;
-void	operator delete[](void *ptr) noexcept;
-void	operator delete(void *ptr, const std::nothrow_t &) noexcept;
-void	operator delete[](void *ptr, const std::nothrow_t &) noexcept;
+void *operator new(std::size_t size);
+void *operator new[](std::size_t size);
+void *operator new(std::size_t size, const std::nothrow_t &) noexcept;
+void *operator new[](std::size_t size, const std::nothrow_t &) noexcept;
+void  operator delete(void *ptr) noexcept;
+void  operator delete[](void *ptr) noexcept;
+void  operator delete(void *ptr, const std::nothrow_t &) noexcept;
+void  operator delete[](void *ptr, const std::nothrow_t &) noexcept;
 
 #if __cpp_sized_deallocation >= 201309
 /* C++14's sized-delete operators. */
-void	operator delete(void *ptr, std::size_t size) noexcept;
-void	operator delete[](void *ptr, std::size_t size) noexcept;
+void operator delete(void *ptr, std::size_t size) noexcept;
+void operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
 #if __cpp_aligned_new >= 201606
 /* C++17's over-aligned operators. */
-void	*operator new(std::size_t size, std::align_val_t);
-void	*operator new(std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
-void	*operator new[](std::size_t size, std::align_val_t);
-void	*operator new[](std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete(void* ptr, std::align_val_t) noexcept;
-void	operator delete(void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
-void	operator delete[](void* ptr, std::align_val_t) noexcept;
-void	operator delete[](void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete[](void* ptr, std::size_t size, std::align_val_t al) noexcept;
+void *operator new(std::size_t size, std::align_val_t);
+void *operator new(
+    std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void *operator new[](std::size_t size, std::align_val_t);
+void *operator new[](
+    std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete(void *ptr, std::align_val_t) noexcept;
+void operator delete(
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete(void *ptr, std::size_t size, std::align_val_t al) noexcept;
+void operator delete[](void *ptr, std::align_val_t) noexcept;
+void operator delete[](
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete[](
+    void *ptr, std::size_t size, std::align_val_t al) noexcept;
 #endif
 
 JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
 	if (opt_experimental_infallible_new) {
-		safety_check_fail("<jemalloc>: Allocation failed and "
-		    "opt.experimental_infallible_new is true. Aborting.\n");
+		const char *huge_warning = (size >= ((std::size_t)1 << 30))
+		    ? "This may be caused by heap corruption, if the large size "
+		      "is unexpected (suggest building with sanitizers for "
+		      "debugging)."
+		    : "";
+
+		safety_check_fail(
+		    "<jemalloc>: Allocation of size %zu failed. "
+		    "%s opt.experimental_infallible_new is true. Aborting.\n",
+		    size, huge_warning);
 		return nullptr;
 	}
 
@@ -68,7 +81,7 @@ handleOOM(std::size_t size, bool nothrow) {
 		std::new_handler handler;
 		// GCC-4.8 and clang 4.0 do not have std::get_new_handler.
 		{
-			static std::mutex mtx;
+			static std::mutex           mtx;
 			std::lock_guard<std::mutex> lock(mtx);
 
 			handler = std::set_new_handler(nullptr);
@@ -92,9 +105,8 @@ handleOOM(std::size_t size, bool nothrow) {
 }
 
 template <bool IsNoExcept>
-JEMALLOC_NOINLINE
-static void *
-fallback_impl(std::size_t size) noexcept(IsNoExcept) {
+JEMALLOC_NOINLINE static void *
+fallbackNewImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = malloc_default(size);
 	if (likely(ptr != nullptr)) {
 		return ptr;
@@ -103,10 +115,14 @@ fallback_impl(std::size_t size) noexcept(IsNoExcept) {
 }
 
 template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
+JEMALLOC_ALWAYS_INLINE void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
-	return imalloc_fastpath(size, &fallback_impl<IsNoExcept>);
+	LOG("core.operator_new.entry", "size: %zu", size);
+
+	void *ret = imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
+
+	LOG("core.operator_new.exit", "result: %p", ret);
+	return ret;
 }
 
 void *
@@ -132,9 +148,9 @@ operator new[](std::size_t size, const std::nothrow_t &) noexcept {
 #if __cpp_aligned_new >= 201606
 
 template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
-alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
+JEMALLOC_ALWAYS_INLINE void *
+alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(
+    IsNoExcept) {
 	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
 	if (likely(ptr != nullptr)) {
 		return ptr;
@@ -154,45 +170,68 @@ operator new[](std::size_t size, std::align_val_t alignment) {
 }
 
 void *
-operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+operator new(std::size_t size, std::align_val_t alignment,
+    const std::nothrow_t &) noexcept {
 	return alignedNewImpl<true>(size, alignment);
 }
 
 void *
-operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+operator new[](std::size_t size, std::align_val_t alignment,
+    const std::nothrow_t &) noexcept {
 	return alignedNewImpl<true>(size, alignment);
 }
 
-#endif  // __cpp_aligned_new
+#endif // __cpp_aligned_new
 
 void
 operator delete(void *ptr) noexcept {
-	je_free(ptr);
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
+	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete[](void *ptr) noexcept {
-	je_free(ptr);
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
+	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete(void *ptr, const std::nothrow_t &) noexcept {
-	je_free(ptr);
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
+	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
-void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
-	je_free(ptr);
+void
+operator delete[](void *ptr, const std::nothrow_t &) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
+	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 #if __cpp_sized_deallocation >= 201309
 
 JEMALLOC_ALWAYS_INLINE
 void
-sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
+sizedDeleteImpl(void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
+	LOG("core.operator_delete.entry", "ptr: %p, size: %zu", ptr, size);
+
 	je_sdallocx_noflags(ptr, size);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
@@ -205,50 +244,76 @@ operator delete[](void *ptr, std::size_t size) noexcept {
 	sizedDeleteImpl(ptr, size);
 }
 
-#endif  // __cpp_sized_deallocation
+#endif // __cpp_sized_deallocation
 
 #if __cpp_aligned_new >= 201606
 
 JEMALLOC_ALWAYS_INLINE
 void
-alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+alignedSizedDeleteImpl(
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	if (config_debug) {
 		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
 	}
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
-	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
+	LOG("core.operator_delete.entry", "ptr: %p, size: %zu, alignment: %zu",
+	    ptr, size, alignment);
+
+	je_sdallocx_impl(ptr, size, MALLOCX_ALIGN(alignment));
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
-operator delete(void* ptr, std::align_val_t) noexcept {
-	je_free(ptr);
+operator delete(void *ptr, std::align_val_t) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
+	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
-operator delete[](void* ptr, std::align_val_t) noexcept {
-	je_free(ptr);
+operator delete[](void *ptr, std::align_val_t) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
+	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
-operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
-	je_free(ptr);
+operator delete(void *ptr, std::align_val_t, const std::nothrow_t &) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
+	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
-operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
-	je_free(ptr);
+operator delete[](
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
+	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
-operator delete(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+operator delete(
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	alignedSizedDeleteImpl(ptr, size, alignment);
 }
 
 void
-operator delete[](void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+operator delete[](
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	alignedSizedDeleteImpl(ptr, size, alignment);
 }
 
-#endif  // __cpp_aligned_new
+#endif // __cpp_aligned_new
+// NOLINTEND(misc-use-anonymous-namespace)
diff --git a/src/large.c b/src/large.c
index 5fc4bf58..6ccf49d7 100644
--- a/src/large.c
+++ b/src/large.c
@@ -18,10 +18,10 @@ large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) {
 }
 
 void *
-large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero) {
-	size_t ausize;
-	edata_t *edata;
+large_palloc(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
+	size_t            ausize;
+	edata_t          *edata;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
@@ -34,12 +34,14 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
-	if (unlikely(arena == NULL) || (edata = arena_extent_alloc_large(tsdn,
-	    arena, usize, alignment, zero)) == NULL) {
+	if (unlikely(arena == NULL)
+	    || (edata = arena_extent_alloc_large(
+	            tsdn, arena, usize, alignment, zero))
+	        == NULL) {
 		return NULL;
 	}
 
-	/* See comments in arena_bin_slabs_full_insert(). */
+	/* See comments in bin_slabs_full_insert(). */
 	if (!arena_is_auto(arena)) {
 		/* Insert edata into large. */
 		malloc_mutex_lock(tsdn, &arena->large_mtx);
@@ -53,10 +55,10 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
-	arena_t *arena = arena_get_from_edata(edata);
+	arena_t  *arena = arena_get_from_edata(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t old_size = edata_size_get(edata);
-	size_t old_usize = edata_usize_get(edata);
+	size_t    old_size = edata_size_get(edata);
+	size_t    old_usize = edata_usize_get(edata);
 
 	assert(old_usize > usize);
 
@@ -80,8 +82,8 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 }
 
 static bool
-large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
-    bool zero) {
+large_ralloc_no_move_expand(
+    tsdn_t *tsdn, edata_t *edata, size_t usize, bool zero) {
 	arena_t *arena = arena_get_from_edata(edata);
 
 	size_t old_size = edata_size_get(edata);
@@ -112,11 +114,11 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			 * offset from the beginning of the extent is a multiple
 			 * of CACHELINE in [0 .. PAGE).
 			 */
-			void *zbase = (void *)
-			    ((uintptr_t)edata_addr_get(edata) + old_usize);
-			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
-			    PAGE));
-			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
+			void *zbase = (void *)((byte_t *)edata_addr_get(edata)
+			    + old_usize);
+			void *zpast = PAGE_ADDR2BASE(
+			    (void *)((byte_t *)zbase + PAGE));
+			size_t nzero = (byte_t *)zpast - (byte_t *)zbase;
 			assert(nzero > 0);
 			memset(zbase, 0, nzero);
 		}
@@ -134,19 +136,19 @@ large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= SC_LARGE_MINCLASS
-	    && usize_max >= SC_LARGE_MINCLASS);
+	assert(oldusize >= SC_LARGE_MINCLASS && usize_max >= SC_LARGE_MINCLASS);
 
 	if (usize_max > oldusize) {
 		/* Attempt to expand the allocation in-place. */
-		if (!large_ralloc_no_move_expand(tsdn, edata, usize_max,
-		    zero)) {
+		if (!large_ralloc_no_move_expand(
+		        tsdn, edata, usize_max, zero)) {
 			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
-		if (usize_min < usize_max && usize_min > oldusize &&
-		    large_ralloc_no_move_expand(tsdn, edata, usize_min, zero)) {
+		if (usize_min < usize_max && usize_min > oldusize
+		    && !large_ralloc_no_move_expand(
+		        tsdn, edata, usize_min, zero)) {
 			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
@@ -172,8 +174,8 @@ large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
 }
 
 static void *
-large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero) {
+large_ralloc_move_helper(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
 	if (alignment <= CACHELINE) {
 		return large_malloc(tsdn, arena, usize, zero);
 	}
@@ -190,14 +192,13 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= SC_LARGE_MINCLASS
-	    && usize >= SC_LARGE_MINCLASS);
+	assert(oldusize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
 	if (!large_ralloc_no_move(tsdn, edata, usize, usize, zero)) {
-		hook_invoke_expand(hook_args->is_realloc
-		    ? hook_expand_realloc : hook_expand_rallocx, ptr, oldusize,
-		    usize, (uintptr_t)ptr, hook_args->args);
+		hook_invoke_expand(hook_args->is_realloc ? hook_expand_realloc
+		                                         : hook_expand_rallocx,
+		    ptr, oldusize, usize, (uintptr_t)ptr, hook_args->args);
 		return edata_addr_get(edata);
 	}
 
@@ -206,17 +207,18 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	void *ret = large_ralloc_move_helper(tsdn, arena, usize, alignment,
-	    zero);
+	void *ret = large_ralloc_move_helper(
+	    tsdn, arena, usize, alignment, zero);
 	if (ret == NULL) {
 		return NULL;
 	}
 
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx,
+	    ret, (uintptr_t)ret, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
 	memcpy(ret, edata_addr_get(edata), copysize);
@@ -228,10 +230,10 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
  * locked indicates whether the arena's large_mtx is currently held.
  */
 static void
-large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    bool locked) {
+large_dalloc_prep_impl(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, bool locked) {
 	if (!locked) {
-		/* See comments in arena_bin_slabs_full_insert(). */
+		/* See comments in bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
 			edata_list_active_remove(&arena->large, edata);
@@ -274,22 +276,17 @@ large_dalloc(tsdn_t *tsdn, edata_t *edata) {
 	arena_decay_tick(tsdn, arena);
 }
 
-size_t
-large_salloc(tsdn_t *tsdn, const edata_t *edata) {
-	return edata_usize_get(edata);
-}
-
 void
-large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
-    bool reset_recent) {
+large_prof_info_get(
+    tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent) {
 	assert(prof_info != NULL);
 
 	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
 	prof_info->alloc_tctx = alloc_tctx;
 
-	if ((uintptr_t)alloc_tctx > (uintptr_t)1U) {
-		nstime_copy(&prof_info->alloc_time,
-		    edata_prof_alloc_time_get(edata));
+	if (prof_tctx_is_valid(alloc_tctx)) {
+		nstime_copy(
+		    &prof_info->alloc_time, edata_prof_alloc_time_get(edata));
 		prof_info->alloc_size = edata_prof_alloc_size_get(edata);
 		if (reset_recent) {
 			/*
@@ -308,7 +305,7 @@ large_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
 
 void
 large_prof_tctx_reset(edata_t *edata) {
-	large_prof_tctx_set(edata, (prof_tctx_t *)(uintptr_t)1U);
+	large_prof_tctx_set(edata, PROF_TCTX_SENTINEL);
 }
 
 void
diff --git a/src/log.c b/src/log.c
index 778902fb..9b1c6261 100644
--- a/src/log.c
+++ b/src/log.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/log.h"
 
-char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+char       log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 atomic_b_t log_init_done = ATOMIC_INIT(false);
 
 /*
@@ -11,7 +11,7 @@ atomic_b_t log_init_done = ATOMIC_INIT(false);
  * with a pointer to the first character after the end of the string.
  */
 static const char *
-log_var_extract_segment(const char* segment_begin) {
+log_var_extract_segment(const char *segment_begin) {
 	const char *end;
 	for (end = segment_begin; *end != '\0' && *end != '|'; end++) {
 	}
@@ -30,12 +30,12 @@ log_var_matches_segment(const char *segment_begin, const char *segment_end,
 	if (segment_len == 1 && *segment_begin == '.') {
 		return true;
 	}
-        if (segment_len == log_var_len) {
+	if (segment_len == log_var_len) {
 		return strncmp(segment_begin, log_var_begin, segment_len) == 0;
 	} else if (segment_len < log_var_len) {
 		return strncmp(segment_begin, log_var_begin, segment_len) == 0
 		    && log_var_begin[segment_len] == '.';
-        } else {
+	} else {
 		return false;
 	}
 }
@@ -61,9 +61,9 @@ log_var_update_state(log_var_t *log_var) {
 		    segment_begin);
 		assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE);
 		if (log_var_matches_segment(segment_begin, segment_end,
-		    log_var_begin, log_var_end)) {
-			atomic_store_u(&log_var->state, LOG_ENABLED,
-			    ATOMIC_RELAXED);
+		        log_var_begin, log_var_end)) {
+			atomic_store_u(
+			    &log_var->state, LOG_ENABLED, ATOMIC_RELAXED);
 			return LOG_ENABLED;
 		}
 		if (*segment_end == '\0') {
diff --git a/src/malloc_io.c b/src/malloc_io.c
index b76885cb..9716c668 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -5,63 +5,68 @@
 #include "jemalloc/internal/util.h"
 
 #ifdef assert
-#  undef assert
+#	undef assert
 #endif
 #ifdef not_reached
-#  undef not_reached
+#	undef not_reached
 #endif
 #ifdef not_implemented
-#  undef not_implemented
+#	undef not_implemented
 #endif
 #ifdef assert_not_implemented
-#  undef assert_not_implemented
+#	undef assert_not_implemented
 #endif
 
 /*
  * Define simple versions of assertion macros that won't recurse in case
  * of assertion failures in malloc_*printf().
  */
-#define assert(e) do {							\
-	if (config_debug && !(e)) {					\
-		malloc_write("<jemalloc>: Failed assertion\n");		\
-		abort();						\
-	}								\
-} while (0)
+#define assert(e)                                                              \
+	do {                                                                   \
+		if (config_debug && !(e)) {                                    \
+			malloc_write("<jemalloc>: Failed assertion\n");        \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define not_reached() do {						\
-	if (config_debug) {						\
-		malloc_write("<jemalloc>: Unreachable code reached\n");	\
-		abort();						\
-	}								\
-	unreachable();							\
-} while (0)
+#define not_reached()                                                          \
+	do {                                                                   \
+		if (config_debug) {                                            \
+			malloc_write(                                          \
+			    "<jemalloc>: Unreachable code reached\n");         \
+			abort();                                               \
+		}                                                              \
+		unreachable();                                                 \
+	} while (0)
 
-#define not_implemented() do {						\
-	if (config_debug) {						\
-		malloc_write("<jemalloc>: Not implemented\n");		\
-		abort();						\
-	}								\
-} while (0)
+#define not_implemented()                                                      \
+	do {                                                                   \
+		if (config_debug) {                                            \
+			malloc_write("<jemalloc>: Not implemented\n");         \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define assert_not_implemented(e) do {					\
-	if (unlikely(config_debug && !(e))) {				\
-		not_implemented();					\
-	}								\
-} while (0)
+#define assert_not_implemented(e)                                              \
+	do {                                                                   \
+		if (unlikely(config_debug && !(e))) {                          \
+			not_implemented();                                     \
+		}                                                              \
+	} while (0)
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
 #define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
-static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
-    size_t *slen_p);
+static char *u2s(
+    uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p);
 #define D2S_BUFSIZE (1 + U2S_BUFSIZE)
 static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p);
 #define O2S_BUFSIZE (1 + U2S_BUFSIZE)
 static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
 #define X2S_BUFSIZE (2 + U2S_BUFSIZE)
-static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
-    size_t *slen_p);
+static char *x2s(
+    uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p);
 
 /******************************************************************************/
 
@@ -71,7 +76,7 @@ wrtmessage(void *cbopaque, const char *s) {
 	malloc_write_fd(STDERR_FILENO, s, strlen(s));
 }
 
-JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
+JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s);
 
 /*
  * Wrapper around malloc_message() that avoids the need for
@@ -93,14 +98,15 @@ malloc_write(const char *s) {
 int
 buferror(int err, char *buf, size_t buflen) {
 #ifdef _WIN32
-	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
-	    (LPSTR)buf, (DWORD)buflen, NULL);
+	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPSTR)buf,
+	    (DWORD)buflen, NULL);
 	return 0;
-#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE)
+#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE)                \
+    && defined(_GNU_SOURCE)
 	char *b = strerror_r(err, buf, buflen);
 	if (b != buf) {
 		strncpy(buf, b, buflen);
-		buf[buflen-1] = '\0';
+		buf[buflen - 1] = '\0';
 	}
 	return 0;
 #else
@@ -110,9 +116,9 @@ buferror(int err, char *buf, size_t buflen) {
 
 uintmax_t
 malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
-	uintmax_t ret, digit;
-	unsigned b;
-	bool neg;
+	uintmax_t   ret, digit;
+	unsigned    b;
+	bool        neg;
 	const char *p, *ns;
 
 	p = nptr;
@@ -128,7 +134,12 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 	neg = false;
 	while (true) {
 		switch (*p) {
-		case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+		case '\t':
+		case '\n':
+		case '\v':
+		case '\f':
+		case '\r':
+		case ' ':
 			p++;
 			break;
 		case '-':
@@ -142,8 +153,8 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 		}
 	}
 
-	/* Get prefix, if any. */
-	label_prefix:
+/* Get prefix, if any. */
+label_prefix:
 	/*
 	 * Note where the first non-whitespace/sign character is so that it is
 	 * possible to tell whether any digits are consumed (e.g., "  0" vs.
@@ -152,8 +163,14 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 	ns = p;
 	if (*p == '0') {
 		switch (p[1]) {
-		case '0': case '1': case '2': case '3': case '4': case '5':
-		case '6': case '7':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
 			if (b == 0) {
 				b = 8;
 			}
@@ -161,13 +178,30 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 				p++;
 			}
 			break;
-		case 'X': case 'x':
+		case 'X':
+		case 'x':
 			switch (p[2]) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
 				if (b == 0) {
 					b = 16;
@@ -244,9 +278,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 		} while (x > 0);
 		break;
 	case 16: {
-		const char *digits = (uppercase)
-		    ? "0123456789ABCDEF"
-		    : "0123456789abcdef";
+		const char *digits = (uppercase) ? "0123456789ABCDEF"
+		                                 : "0123456789abcdef";
 
 		do {
 			i--;
@@ -254,7 +287,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 			x >>= 4;
 		} while (x > 0);
 		break;
-	} default: {
+	}
+	default: {
 		const char *digits = (uppercase)
 		    ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 		    : "0123456789abcdefghijklmnopqrstuvwxyz";
@@ -265,7 +299,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 			s[i] = digits[x % (uint64_t)base];
 			x /= (uint64_t)base;
 		} while (x > 0);
-	}}
+	}
+	}
 
 	*slen_p = U2S_BUFSIZE - 1 - i;
 	return &s[i];
@@ -294,7 +329,8 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
 		(*slen_p)++;
 		*s = sign;
 		break;
-	default: not_reached();
+	default:
+		not_reached();
 	}
 	return s;
 }
@@ -316,7 +352,8 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 	if (alt_form) {
 		s -= 2;
 		(*slen_p) += 2;
-		memcpy(s, uppercase ? "0X" : "0x", 2);
+		s[0] = '0';
+		s[1] = uppercase ? 'X' : 'x';
 	}
 	return s;
 }
@@ -324,107 +361,112 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 JEMALLOC_COLD
 size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
-	size_t i;
+	size_t      i;
 	const char *f;
 
-#define APPEND_C(c) do {						\
-	if (i < size) {							\
-		str[i] = (c);						\
-	}								\
-	i++;								\
-} while (0)
-#define APPEND_S(s, slen) do {						\
-	if (i < size) {							\
-		size_t cpylen = (slen <= size - i) ? slen : size - i;	\
-		memcpy(&str[i], s, cpylen);				\
-	}								\
-	i += slen;							\
-} while (0)
-#define APPEND_PADDED_S(s, slen, width, left_justify) do {		\
-	/* Left padding. */						\
-	size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ?	\
-	    (size_t)width - slen : 0);					\
-	if (!left_justify && pad_len != 0) {				\
-		size_t j;						\
-		for (j = 0; j < pad_len; j++) {				\
-			if (pad_zero) {					\
-				APPEND_C('0');				\
-			} else {					\
-				APPEND_C(' ');				\
-			}						\
-		}							\
-	}								\
-	/* Value. */							\
-	APPEND_S(s, slen);						\
-	/* Right padding. */						\
-	if (left_justify && pad_len != 0) {				\
-		size_t j;						\
-		for (j = 0; j < pad_len; j++) {				\
-			APPEND_C(' ');					\
-		}							\
-	}								\
-} while (0)
-#define GET_ARG_NUMERIC(val, len) do {					\
-	switch ((unsigned char)len) {					\
-	case '?':							\
-		val = va_arg(ap, int);					\
-		break;							\
-	case '?' | 0x80:						\
-		val = va_arg(ap, unsigned int);				\
-		break;							\
-	case 'l':							\
-		val = va_arg(ap, long);					\
-		break;							\
-	case 'l' | 0x80:						\
-		val = va_arg(ap, unsigned long);			\
-		break;							\
-	case 'q':							\
-		val = va_arg(ap, long long);				\
-		break;							\
-	case 'q' | 0x80:						\
-		val = va_arg(ap, unsigned long long);			\
-		break;							\
-	case 'j':							\
-		val = va_arg(ap, intmax_t);				\
-		break;							\
-	case 'j' | 0x80:						\
-		val = va_arg(ap, uintmax_t);				\
-		break;							\
-	case 't':							\
-		val = va_arg(ap, ptrdiff_t);				\
-		break;							\
-	case 'z':							\
-		val = va_arg(ap, ssize_t);				\
-		break;							\
-	case 'z' | 0x80:						\
-		val = va_arg(ap, size_t);				\
-		break;							\
-	case 'p': /* Synthetic; used for %p. */				\
-		val = va_arg(ap, uintptr_t);				\
-		break;							\
-	default:							\
-		not_reached();						\
-		val = 0;						\
-	}								\
-} while (0)
+#define APPEND_C(c)                                                            \
+	do {                                                                   \
+		if (i < size) {                                                \
+			str[i] = (c);                                          \
+		}                                                              \
+		i++;                                                           \
+	} while (0)
+#define APPEND_S(s, slen)                                                      \
+	do {                                                                   \
+		if (i < size) {                                                \
+			size_t cpylen = (slen <= size - i) ? slen : size - i;  \
+			memcpy(&str[i], s, cpylen);                            \
+		}                                                              \
+		i += slen;                                                     \
+	} while (0)
+#define APPEND_PADDED_S(s, slen, width, left_justify)                          \
+	do {                                                                   \
+		/* Left padding. */                                            \
+		size_t pad_len = (width == -1)                                 \
+		    ? 0                                                        \
+		    : ((slen < (size_t)width) ? (size_t)width - slen : 0);     \
+		if (!left_justify && pad_len != 0) {                           \
+			size_t j;                                              \
+			for (j = 0; j < pad_len; j++) {                        \
+				if (pad_zero) {                                \
+					APPEND_C('0');                         \
+				} else {                                       \
+					APPEND_C(' ');                         \
+				}                                              \
+			}                                                      \
+		}                                                              \
+		/* Value. */                                                   \
+		APPEND_S(s, slen);                                             \
+		/* Right padding. */                                           \
+		if (left_justify && pad_len != 0) {                            \
+			size_t j;                                              \
+			for (j = 0; j < pad_len; j++) {                        \
+				APPEND_C(' ');                                 \
+			}                                                      \
+		}                                                              \
+	} while (0)
+#define GET_ARG_NUMERIC(val, len)                                              \
+	do {                                                                   \
+		switch ((unsigned char)len) {                                  \
+		case '?':                                                      \
+			val = va_arg(ap, int);                                 \
+			break;                                                 \
+		case '?' | 0x80:                                               \
+			val = va_arg(ap, unsigned int);                        \
+			break;                                                 \
+		case 'l':                                                      \
+			val = va_arg(ap, long);                                \
+			break;                                                 \
+		case 'l' | 0x80:                                               \
+			val = va_arg(ap, unsigned long);                       \
+			break;                                                 \
+		case 'q':                                                      \
+			val = va_arg(ap, long long);                           \
+			break;                                                 \
+		case 'q' | 0x80:                                               \
+			val = va_arg(ap, unsigned long long);                  \
+			break;                                                 \
+		case 'j':                                                      \
+			val = va_arg(ap, intmax_t);                            \
+			break;                                                 \
+		case 'j' | 0x80:                                               \
+			val = va_arg(ap, uintmax_t);                           \
+			break;                                                 \
+		case 't':                                                      \
+			val = va_arg(ap, ptrdiff_t);                           \
+			break;                                                 \
+		case 'z':                                                      \
+			val = va_arg(ap, ssize_t);                             \
+			break;                                                 \
+		case 'z' | 0x80:                                               \
+			val = va_arg(ap, size_t);                              \
+			break;                                                 \
+		case 'p': /* Synthetic; used for %p. */                        \
+			val = va_arg(ap, uintptr_t);                           \
+			break;                                                 \
+		default:                                                       \
+			not_reached();                                         \
+			val = 0;                                               \
+		}                                                              \
+	} while (0)
 
 	i = 0;
 	f = format;
 	while (true) {
 		switch (*f) {
-		case '\0': goto label_out;
+		case '\0':
+			goto label_out;
 		case '%': {
-			bool alt_form = false;
-			bool left_justify = false;
-			bool plus_space = false;
-			bool plus_plus = false;
-			int prec = -1;
-			int width = -1;
+			bool          alt_form = false;
+			bool          left_justify = false;
+			bool          plus_space = false;
+			bool          plus_plus = false;
+			int           prec = -1;
+			int           width = -1;
 			unsigned char len = '?';
-			char *s;
-			size_t slen;
-			bool first_width_digit = true;
-			bool pad_zero = false;
+			char         *s;
+			size_t        slen;
+			bool          pad_zero = false;
 
 			f++;
 			/* Flags. */
@@ -446,12 +488,13 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 					assert(!plus_plus);
 					plus_plus = true;
 					break;
-				default: goto label_width;
+				default:
+					goto label_width;
 				}
 				f++;
 			}
-			/* Width. */
-			label_width:
+		/* Width. */
+		label_width:
 			switch (*f) {
 			case '*':
 				width = va_arg(ap, int);
@@ -462,21 +505,26 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				}
 				break;
 			case '0':
-				if (first_width_digit) {
-					pad_zero = true;
-				}
+				pad_zero = true;
 				JEMALLOC_FALLTHROUGH;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9': {
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9': {
 				uintmax_t uwidth;
 				set_errno(0);
 				uwidth = malloc_strtoumax(f, (char **)&f, 10);
-				assert(uwidth != UINTMAX_MAX || get_errno() !=
-				    ERANGE);
+				assert(uwidth != UINTMAX_MAX
+				    || get_errno() != ERANGE);
 				width = (int)uwidth;
-				first_width_digit = false;
 				break;
-			} default:
+			}
+			default:
 				break;
 			}
 			/* Width/precision separator. */
@@ -491,20 +539,29 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				prec = va_arg(ap, int);
 				f++;
 				break;
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9': {
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9': {
 				uintmax_t uprec;
 				set_errno(0);
 				uprec = malloc_strtoumax(f, (char **)&f, 10);
-				assert(uprec != UINTMAX_MAX || get_errno() !=
-				    ERANGE);
+				assert(uprec != UINTMAX_MAX
+				    || get_errno() != ERANGE);
 				prec = (int)uprec;
 				break;
 			}
-			default: break;
+			default:
+				break;
 			}
-			/* Length. */
-			label_length:
+		/* Length. */
+		label_length:
 			switch (*f) {
 			case 'l':
 				f++;
@@ -515,11 +572,15 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 					len = 'l';
 				}
 				break;
-			case 'q': case 'j': case 't': case 'z':
+			case 'q':
+			case 'j':
+			case 't':
+			case 'z':
 				len = *f;
 				f++;
 				break;
-			default: break;
+			default:
+				break;
 			}
 			/* Conversion specifier. */
 			switch (*f) {
@@ -528,9 +589,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				APPEND_C(*f);
 				f++;
 				break;
-			case 'd': case 'i': {
+			case 'd':
+			case 'i': {
 				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[D2S_BUFSIZE];
+				char         buf[D2S_BUFSIZE];
 
 				/*
 				 * Outputting negative, zero-padded numbers
@@ -545,41 +607,48 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				assert(!pad_zero);
 
 				GET_ARG_NUMERIC(val, len);
-				s = d2s(val, (plus_plus ? '+' : (plus_space ?
-				    ' ' : '-')), buf, &slen);
+				s = d2s(val,
+				    (plus_plus ? '+'
+				               : (plus_space ? ' ' : '-')),
+				    buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'o': {
+			}
+			case 'o': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[O2S_BUFSIZE];
+				char          buf[O2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = o2s(val, alt_form, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'u': {
+			}
+			case 'u': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[U2S_BUFSIZE];
+				char          buf[U2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = u2s(val, 10, false, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'x': case 'X': {
+			}
+			case 'x':
+			case 'X': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[X2S_BUFSIZE];
+				char          buf[X2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = x2s(val, alt_form, *f == 'X', buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'c': {
+			}
+			case 'c': {
 				unsigned char val;
-				char buf[2];
+				char          buf[2];
 
 				assert(len == '?' || len == 'l');
 				assert_not_implemented(len != 'l');
@@ -589,7 +658,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				APPEND_PADDED_S(buf, 1, width, left_justify);
 				f++;
 				break;
-			} case 's':
+			}
+			case 's':
 				assert(len == '?' || len == 'l');
 				assert_not_implemented(len != 'l');
 				s = va_arg(ap, char *);
@@ -599,26 +669,30 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				break;
 			case 'p': {
 				uintmax_t val;
-				char buf[X2S_BUFSIZE];
+				char      buf[X2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, 'p');
 				s = x2s(val, true, false, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} default: not_reached();
+			}
+			default:
+				not_reached();
 			}
 			break;
-		} default: {
+		}
+		default: {
 			APPEND_C(*f);
 			f++;
 			break;
-		}}
+		}
+		}
 	}
-	label_out:
+label_out:
 	if (i < size) {
 		str[i] = '\0';
-	} else {
+	} else if (size != 0) {
 		str[size - 1] = '\0';
 	}
 
@@ -632,7 +706,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 JEMALLOC_FORMAT_PRINTF(3, 4)
 size_t
 malloc_snprintf(char *str, size_t size, const char *format, ...) {
-	size_t ret;
+	size_t  ret;
 	va_list ap;
 
 	va_start(ap, format);
@@ -643,8 +717,8 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) {
 }
 
 void
-malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
-    va_list ap) {
+malloc_vcprintf(
+    write_cb_t *write_cb, void *cbopaque, const char *format, va_list ap) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
 	if (write_cb == NULL) {
@@ -653,8 +727,8 @@ malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
 		 * function, so use the default one.  malloc_write() is an
 		 * inline function, so use malloc_message() directly here.
 		 */
-		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
-		    wrtmessage;
+		write_cb = (je_malloc_message != NULL) ? je_malloc_message
+		                                       : wrtmessage;
 	}
 
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
@@ -686,6 +760,81 @@ malloc_printf(const char *format, ...) {
 	va_end(ap);
 }
 
+static ssize_t
+malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
+	/*
+	 * Use syscall(2) rather than write(2) when possible in order to avoid
+	 * the possibility of memory allocation within libc.  This is necessary
+	 * on FreeBSD; most operating systems do not have this problem though.
+	 *
+	 * syscall() returns long or int, depending on platform, so capture the
+	 * result in the widest plausible type to avoid compiler warnings.
+	 */
+	return (ssize_t)syscall(SYS_write, fd, buf, count);
+#else
+	return (ssize_t)write(fd, buf,
+#	ifdef _WIN32
+	    (unsigned int)
+#	endif
+	        count);
+#endif
+}
+
+ssize_t
+malloc_write_fd(int fd, const void *buf, size_t count) {
+	size_t bytes_written = 0;
+	do {
+		ssize_t result = malloc_write_fd_syscall(fd,
+		    &((const byte_t *)buf)[bytes_written],
+		    count - bytes_written);
+		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
+			return result;
+		}
+		bytes_written += result;
+	} while (bytes_written < count);
+	return bytes_written;
+}
+
+static ssize_t
+malloc_read_fd_syscall(int fd, void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	return (ssize_t)syscall(SYS_read, fd, buf, count);
+#else
+	return (ssize_t)read(fd, buf,
+#	ifdef _WIN32
+	    (unsigned int)
+#	endif
+	        count);
+#endif
+}
+
+ssize_t
+malloc_read_fd(int fd, void *buf, size_t count) {
+	size_t bytes_read = 0;
+	do {
+		ssize_t result = malloc_read_fd_syscall(
+		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
+		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
+			return result;
+		} else if (result == 0) {
+			break;
+		}
+		bytes_read += result;
+	} while (bytes_read < count);
+	return bytes_read;
+}
+
 /*
  * Restore normal assertion macros, in order to make it possible to compile all
  * C files as a single concatenation.
diff --git a/src/mutex.c b/src/mutex.c
index 0b3547a8..aa2ab665 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -5,8 +5,8 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/spin.h"
 
-#ifndef _CRT_SPINCOUNT
-#define _CRT_SPINCOUNT 4000
+#if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
+#	define _CRT_SPINCOUNT 4000
 #endif
 
 /*
@@ -22,8 +22,8 @@ int64_t opt_mutex_max_spin = 600;
 bool isthreaded = false;
 #endif
 #ifdef JEMALLOC_MUTEX_INIT_CB
-static bool		postpone_init = true;
-static malloc_mutex_t	*postponed_mutexes = NULL;
+static bool            postpone_init = true;
+static malloc_mutex_t *postponed_mutexes = NULL;
 #endif
 
 /******************************************************************************/
@@ -44,14 +44,14 @@ pthread_create(pthread_t *__restrict thread,
 /******************************************************************************/
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
-    void *(calloc_cb)(size_t, size_t));
+JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(
+    pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t));
 #endif
 
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	mutex_prof_data_t *data = &mutex->prof_data;
-	nstime_t before;
+	nstime_t           before;
 
 	if (ncpus == 1) {
 		goto label_spin_done;
@@ -61,7 +61,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	do {
 		spin_cpu_spinwait();
 		if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
-                    && !malloc_mutex_trylock_final(mutex)) {
+		    && !malloc_mutex_trylock_final(mutex)) {
 			data->n_spin_acquired++;
 			return;
 		}
@@ -77,8 +77,9 @@ label_spin_done:
 	/* Copy before to after to avoid clock skews. */
 	nstime_t after;
 	nstime_copy(&after, &before);
-	uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1,
-	    ATOMIC_RELAXED) + 1;
+	uint32_t n_thds = atomic_fetch_add_u32(
+	                      &data->n_waiting_thds, 1, ATOMIC_RELAXED)
+	    + 1;
 	/* One last try as above two calls may take quite some cycles. */
 	if (!malloc_mutex_trylock_final(mutex)) {
 		atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
@@ -137,27 +138,28 @@ mutex_addr_comp(const witness_t *witness1, void *mutex1,
 }
 
 bool
-malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
-    witness_rank_t rank, malloc_mutex_lock_order_t lock_order) {
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank,
+    malloc_mutex_lock_order_t lock_order) {
 	mutex_prof_data_init(&mutex->prof_data);
 #ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
+#	if _WIN32_WINNT >= 0x0600
 	InitializeSRWLock(&mutex->lock);
-#  else
-	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
-	    _CRT_SPINCOUNT)) {
+#	else
+	if (!InitializeCriticalSectionAndSpinCount(
+	        &mutex->lock, _CRT_SPINCOUNT)) {
 		return true;
 	}
-#  endif
+#	endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-       mutex->lock = OS_UNFAIR_LOCK_INIT;
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	if (postpone_init) {
 		mutex->postponed_next = postponed_mutexes;
 		postponed_mutexes = mutex;
 	} else {
-		if (_pthread_mutex_init_calloc_cb(&mutex->lock,
-		    bootstrap_calloc) != 0) {
+		if (_pthread_mutex_init_calloc_cb(
+		        &mutex->lock, bootstrap_calloc)
+		    != 0) {
 			return true;
 		}
 	}
@@ -201,9 +203,10 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	malloc_mutex_unlock(tsdn, mutex);
 #else
-	if (malloc_mutex_init(mutex, mutex->witness.name,
-	    mutex->witness.rank, mutex->lock_order)) {
-		malloc_printf("<jemalloc>: Error re-initializing mutex in "
+	if (malloc_mutex_init(mutex, mutex->witness.name, mutex->witness.rank,
+	        mutex->lock_order)) {
+		malloc_printf(
+		    "<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
 		if (opt_abort) {
 			abort();
@@ -217,8 +220,9 @@ malloc_mutex_boot(void) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	postpone_init = false;
 	while (postponed_mutexes != NULL) {
-		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
-		    bootstrap_calloc) != 0) {
+		if (_pthread_mutex_init_calloc_cb(
+		        &postponed_mutexes->lock, bootstrap_calloc)
+		    != 0) {
 			return true;
 		}
 		postponed_mutexes = postponed_mutexes->postponed_next;
diff --git a/src/nstime.c b/src/nstime.c
index a1a53777..0dfbeda1 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -5,8 +5,8 @@
 
 #include "jemalloc/internal/assert.h"
 
-#define BILLION	UINT64_C(1000000000)
-#define MILLION	UINT64_C(1000000)
+#define BILLION UINT64_C(1000000000)
+#define MILLION UINT64_C(1000000)
 
 static void
 nstime_set_initialized(nstime_t *time) {
@@ -22,8 +22,8 @@ nstime_assert_initialized(const nstime_t *time) {
 	 * Some parts (e.g. stats) rely on memset to zero initialize.  Treat
 	 * these as valid initialization.
 	 */
-	assert(time->magic == NSTIME_MAGIC ||
-	    (time->magic == 0 && time->ns == 0));
+	assert(
+	    time->magic == NSTIME_MAGIC || (time->magic == 0 && time->ns == 0));
 #endif
 }
 
@@ -63,7 +63,7 @@ nstime_ns(const nstime_t *time) {
 }
 
 uint64_t
-nstime_msec(const nstime_t *time) {
+nstime_ms(const nstime_t *time) {
 	nstime_assert_initialized(time);
 	return time->ns / MILLION;
 }
@@ -133,8 +133,10 @@ nstime_isubtract(nstime_t *time, uint64_t subtrahend) {
 void
 nstime_imultiply(nstime_t *time, uint64_t multiplier) {
 	nstime_assert_initialized(time);
-	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
-	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
+	assert(
+	    (((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) << 2)))
+	        == 0)
+	    || ((time->ns * multiplier) / multiplier == time->ns));
 
 	nstime_initialize_operand(time);
 	time->ns *= multiplier;
@@ -158,7 +160,20 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	return time->ns / divisor->ns;
 }
 
-/* Returns time since *past, w/o updating *past. */
+uint64_t
+nstime_ns_between(const nstime_t *earlier, const nstime_t *later) {
+	nstime_assert_initialized(earlier);
+	nstime_assert_initialized(later);
+	assert(nstime_compare(later, earlier) >= 0);
+	return later->ns - earlier->ns;
+}
+
+uint64_t
+nstime_ms_between(const nstime_t *earlier, const nstime_t *later) {
+	return nstime_ns_between(earlier, later) / MILLION;
+}
+
+/* Returns time since *past in nanoseconds, w/o updating *past. */
 uint64_t
 nstime_ns_since(const nstime_t *past) {
 	nstime_assert_initialized(past);
@@ -166,13 +181,17 @@ nstime_ns_since(const nstime_t *past) {
 	nstime_t now;
 	nstime_copy(&now, past);
 	nstime_update(&now);
+	return nstime_ns_between(past, &now);
+}
 
-	assert(nstime_compare(&now, past) >= 0);
-	return now.ns - past->ns;
+/* Returns time since *past in milliseconds, w/o updating *past. */
+uint64_t
+nstime_ms_since(const nstime_t *past) {
+	return nstime_ns_since(past) / MILLION;
 }
 
 #ifdef _WIN32
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time) {
 	FILETIME ft;
@@ -184,7 +203,7 @@ nstime_get(nstime_t *time) {
 	nstime_init(time, ticks_100ns * 100);
 }
 #elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	struct timespec ts;
@@ -193,7 +212,7 @@ nstime_get(nstime_t *time) {
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	struct timespec ts;
@@ -201,14 +220,25 @@ nstime_get(nstime_t *time) {
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
-#elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
-#  define NSTIME_MONOTONIC true
+#elif defined(JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP)
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
-	nstime_init(time, mach_absolute_time());
+	nstime_init(time, clock_gettime_nsec_np(CLOCK_UPTIME_RAW));
+}
+#elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
+#	define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+	static mach_timebase_info_data_t sTimebaseInfo;
+	if (sTimebaseInfo.denom == 0) {
+		(void)mach_timebase_info(&sTimebaseInfo);
+	}
+	nstime_init(time,
+	    mach_absolute_time() * sTimebaseInfo.numer / sTimebaseInfo.denom);
 }
 #else
-#  define NSTIME_MONOTONIC false
+#	define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time) {
 	struct timeval tv;
@@ -225,15 +255,13 @@ nstime_monotonic_impl(void) {
 }
 nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
 
-prof_time_res_t opt_prof_time_res =
-	prof_time_res_default;
+prof_time_res_t opt_prof_time_res = prof_time_res_default;
 
-const char *prof_time_res_mode_names[] = {
-	"default",
-	"high",
+const char *const prof_time_res_mode_names[] = {
+    "default",
+    "high",
 };
 
-
 static void
 nstime_get_realtime(nstime_t *time) {
 #if defined(JEMALLOC_HAVE_CLOCK_REALTIME) && !defined(_WIN32)
@@ -285,5 +313,3 @@ nstime_prof_init_update(nstime_t *time) {
 	nstime_init_zero(time);
 	nstime_prof_update(time);
 }
-
-
diff --git a/src/pa.c b/src/pa.c
index eb7e4620..a03b0c1c 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -11,13 +11,13 @@ pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 
 static void
 pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
-	assert(atomic_load_zu(&shard->nactive, ATOMIC_RELAXED) >= sub_pages);
+	assert(pa_shard_nactive(shard) >= sub_pages);
 	atomic_fetch_sub_zu(&shard->nactive, sub_pages, ATOMIC_RELAXED);
 }
 
 bool
 pa_central_init(pa_central_t *central, base_t *base, bool hpa,
-    hpa_hooks_t *hpa_hooks) {
+    const hpa_hooks_t *hpa_hooks) {
 	bool err;
 	if (hpa) {
 		err = hpa_central_init(&central->hpa, base, hpa_hooks);
@@ -41,8 +41,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
 	}
 
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
-	    cur_time, pac_oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
-	    &stats->pac_stats, stats_mtx)) {
+	        cur_time, pac_oversize_threshold, dirty_decay_ms,
+	        muzzy_decay_ms, &stats->pac_stats, stats_mtx)) {
 		return true;
 	}
 
@@ -67,12 +67,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
 bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
     const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
-	if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap,
-	    shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
-		return true;
-	}
-	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
-	    hpa_sec_opts)) {
+	if (hpa_shard_init(tsdn, &shard->hpa_shard, &shard->central->hpa,
+	        shard->emap, shard->base, &shard->edata_cache, shard->ind,
+	        hpa_opts, hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
@@ -85,7 +82,6 @@ void
 pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
 	if (shard->ever_used_hpa) {
-		sec_disable(tsdn, &shard->hpa_sec);
 		hpa_shard_disable(tsdn, &shard->hpa_shard);
 	}
 }
@@ -93,8 +89,13 @@ pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
+	pa_shard_flush(tsdn, shard);
+}
+
+void
+pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) {
 	if (shard->ever_used_hpa) {
-		sec_flush(tsdn, &shard->hpa_sec);
+		hpa_shard_flush(tsdn, &shard->hpa_shard);
 	}
 }
 
@@ -107,28 +108,27 @@ void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 	if (shard->ever_used_hpa) {
-		sec_flush(tsdn, &shard->hpa_sec);
-		hpa_shard_disable(tsdn, &shard->hpa_shard);
+		hpa_shard_destroy(tsdn, &shard->hpa_shard);
 	}
 }
 
 static pai_t *
 pa_get_pai(pa_shard_t *shard, edata_t *edata) {
-	return (edata_pai_get(edata) == EXTENT_PAI_PAC
-	    ? &shard->pac.pai : &shard->hpa_sec.pai);
+	return (edata_pai_get(edata) == EXTENT_PAI_PAC ? &shard->pac.pai
+	                                               : &shard->hpa_shard.pai);
 }
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool zero, bool guarded,
     bool *deferred_work_generated) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	assert(!guarded || alignment <= PAGE);
 
 	edata_t *edata = NULL;
 	if (!guarded && pa_shard_uses_hpa(shard)) {
-		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
+		edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
 		    zero, /* guarded */ false, slab, deferred_work_generated);
 	}
 	/*
@@ -190,8 +190,8 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	size_t shrink_amount = old_size - new_size;
 
 	pai_t *pai = pa_get_pai(shard, edata);
-	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size,
-	    deferred_work_generated);
+	bool   error = pai_shrink(
+            tsdn, pai, edata, old_size, new_size, deferred_work_generated);
 	if (error) {
 		return true;
 	}
@@ -220,13 +220,6 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	pai_dalloc(tsdn, pai, edata, deferred_work_generated);
 }
 
-bool
-pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
-    size_t *old_limit, size_t *new_limit) {
-	return pac_retain_grow_limit_get_set(tsdn, &shard->pac, old_limit,
-	    new_limit);
-}
-
 bool
 pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
@@ -239,11 +232,11 @@ pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 }
 
 void
-pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
-    bool deferral_allowed) {
+pa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, pa_shard_t *shard, bool deferral_allowed) {
 	if (pa_shard_uses_hpa(shard)) {
-		hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
-		    deferral_allowed);
+		hpa_shard_set_deferral_allowed(
+		    tsdn, &shard->hpa_shard, deferral_allowed);
 	}
 }
 
@@ -267,8 +260,8 @@ pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 
 	if (pa_shard_uses_hpa(shard)) {
-		uint64_t hpa =
-		    pai_time_until_deferred_work(tsdn, &shard->hpa_shard.pai);
+		uint64_t hpa = pai_time_until_deferred_work(
+		    tsdn, &shard->hpa_shard.pai);
 		if (hpa < time) {
 			time = hpa;
 		}
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 0f488be6..ff45674f 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -17,7 +17,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
 	if (shard->ever_used_hpa) {
-		sec_prefork2(tsdn, &shard->hpa_sec);
+		hpa_shard_prefork2(tsdn, &shard->hpa_shard);
 	}
 }
 
@@ -54,7 +54,6 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
-		sec_postfork_parent(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
 	}
 }
@@ -69,24 +68,41 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
-		sec_postfork_child(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
 	}
 }
 
+size_t
+pa_shard_nactive(pa_shard_t *shard) {
+	return atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
+}
+
+size_t
+pa_shard_ndirty(pa_shard_t *shard) {
+	size_t ndirty = ecache_npages_get(&shard->pac.ecache_dirty);
+	if (shard->ever_used_hpa) {
+		ndirty += psset_ndirty(&shard->hpa_shard.psset);
+	}
+	return ndirty;
+}
+
+size_t
+pa_shard_nmuzzy(pa_shard_t *shard) {
+	return ecache_npages_get(&shard->pac.ecache_muzzy);
+}
+
 void
-pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
-    size_t *nmuzzy) {
-	*nactive += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&shard->pac.ecache_dirty);
-	*nmuzzy += ecache_npages_get(&shard->pac.ecache_muzzy);
+pa_shard_basic_stats_merge(
+    pa_shard_t *shard, size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
+	*nactive += pa_shard_nactive(shard);
+	*ndirty += pa_shard_ndirty(shard);
+	*nmuzzy += pa_shard_nmuzzy(shard);
 }
 
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
-    size_t *resident) {
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
 	cassert(config_stats);
 
 	pa_shard_stats_out->pac_stats.retained +=
@@ -95,37 +111,37 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	    &shard->edata_cache.count, ATOMIC_RELAXED);
 
 	size_t resident_pgs = 0;
-	resident_pgs += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	resident_pgs += ecache_npages_get(&shard->pac.ecache_dirty);
+	resident_pgs += pa_shard_nactive(shard);
+	resident_pgs += pa_shard_ndirty(shard);
 	*resident += (resident_pgs << LG_PAGE);
 
 	/* Dirty decay stats */
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.npurge));
+	        &shard->pac.stats->decay_dirty.npurge));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.nmadvise));
+	        &shard->pac.stats->decay_dirty.nmadvise));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.purged));
+	        &shard->pac.stats->decay_dirty.purged));
 
 	/* Muzzy decay stats */
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.npurge));
+	        &shard->pac.stats->decay_muzzy.npurge));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.nmadvise));
+	        &shard->pac.stats->decay_muzzy.nmadvise));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.purged));
+	        &shard->pac.stats->decay_muzzy.purged));
 
 	atomic_load_add_store_zu(&pa_shard_stats_out->pac_stats.abandoned_vm,
 	    atomic_load_zu(&shard->pac.stats->abandoned_vm, ATOMIC_RELAXED));
@@ -138,8 +154,8 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		retained = ecache_nextents_get(&shard->pac.ecache_retained, i);
 		dirty_bytes = ecache_nbytes_get(&shard->pac.ecache_dirty, i);
 		muzzy_bytes = ecache_nbytes_get(&shard->pac.ecache_muzzy, i);
-		retained_bytes = ecache_nbytes_get(&shard->pac.ecache_retained,
-		    i);
+		retained_bytes = ecache_nbytes_get(
+		    &shard->pac.ecache_retained, i);
 
 		estats_out[i].ndirty = dirty;
 		estats_out[i].nmuzzy = muzzy;
@@ -151,7 +167,6 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 
 	if (shard->ever_used_hpa) {
 		hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
-		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
 
@@ -185,7 +200,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 		    &shard->hpa_shard.grow_mtx,
 		    arena_prof_mutex_hpa_shard_grow);
-		sec_mutex_stats_read(tsdn, &shard->hpa_sec,
+		sec_mutex_stats_read(tsdn, &shard->hpa_shard.sec,
 		    &mutex_prof_data[arena_prof_mutex_hpa_sec]);
 	}
 }
diff --git a/src/pac.c b/src/pac.c
index 53e3d823..ed0f77c2 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -7,18 +7,18 @@
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
+static bool     pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool     pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void     pac_dalloc_impl(
+        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 static inline void
-pac_decay_data_get(pac_t *pac, extent_state_t state,
-    decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
-	switch(state) {
+pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay,
+    pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
+	switch (state) {
 	case extent_state_dirty:
 		*r_decay = &pac->decay_dirty;
 		*r_decay_stats = &pac->stats->decay_dirty;
@@ -29,6 +29,10 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 		*r_decay_stats = &pac->stats->decay_muzzy;
 		*r_ecache = &pac->ecache_muzzy;
 		return;
+	case extent_state_active:
+	case extent_state_retained:
+	case extent_state_transition:
+	case extent_state_merging:
 	default:
 		unreachable();
 	}
@@ -47,7 +51,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * merging/splitting extents is non-trivial.
 	 */
 	if (ecache_init(tsdn, &pac->ecache_dirty, extent_state_dirty, ind,
-	    /* delay_coalesce */ true)) {
+	        /* delay_coalesce */ true)) {
 		return true;
 	}
 	/*
@@ -55,7 +59,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * the critical path much less often than for dirty extents.
 	 */
 	if (ecache_init(tsdn, &pac->ecache_muzzy, extent_state_muzzy, ind,
-	    /* delay_coalesce */ false)) {
+	        /* delay_coalesce */ false)) {
 		return true;
 	}
 	/*
@@ -64,17 +68,17 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained,
-	    ind, /* delay_coalesce */ false)) {
+	if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained, ind,
+	        /* delay_coalesce */ false)) {
 		return true;
 	}
 	exp_grow_init(&pac->exp_grow);
 	if (malloc_mutex_init(&pac->grow_mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	atomic_store_zu(&pac->oversize_threshold, pac_oversize_threshold,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &pac->oversize_threshold, pac_oversize_threshold, ATOMIC_RELAXED);
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
 	}
@@ -93,11 +97,9 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
 
 	pac->pai.alloc = &pac_alloc_impl;
-	pac->pai.alloc_batch = &pai_alloc_batch_default;
 	pac->pai.expand = &pac_expand_impl;
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
-	pac->pai.dalloc_batch = &pai_dalloc_batch_default;
 	pac->pai.time_until_deferred_work = &pac_time_until_deferred_work;
 
 	return false;
@@ -108,10 +110,28 @@ pac_may_have_muzzy(pac_t *pac) {
 	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 }
 
+static size_t
+pac_alloc_retained_batched_size(size_t size) {
+	if (size > SC_LARGE_MAXCLASS) {
+		/*
+		 * A valid input with usize SC_LARGE_MAXCLASS could still
+		 * reach here because of sz_large_pad.  Such a request is valid
+		 * but we should not further increase it.  Thus, directly
+		 * return size for such cases.
+		 */
+		return size;
+	}
+	size_t batched_size = sz_s2u_compute_using_delta(size);
+	size_t next_hugepage_size = HUGEPAGE_CEILING(size);
+	return batched_size > next_hugepage_size ? next_hugepage_size
+	                                         : batched_size;
+}
+
 static edata_t *
 pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
     size_t alignment, bool zero, bool guarded) {
 	assert(!guarded || alignment <= PAGE);
+	size_t newly_mapped_size = 0;
 
 	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
 	    NULL, size, alignment, zero, guarded);
@@ -120,16 +140,74 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
 		    NULL, size, alignment, zero, guarded);
 	}
+
+	/*
+	 * We batched allocate a larger extent with large size classes disabled
+	 * because the reuse of extents in the dirty pool is worse without size
+	 * classes for large allocs.  For instance, when
+	 * disable_large_size_classes is false, 1.1MB, 1.15MB, and 1.2MB allocs
+	 * will all be ceiled to 1.25MB and can reuse the same buffer if they
+	 * are alloc & dalloc sequentially.  However, with
+	 * disable_large_size_classes being true, they cannot reuse the same
+	 * buffer and their sequential allocs & dallocs will result in three
+	 * different extents.  Thus, we cache extra mergeable extents in the
+	 * dirty pool to improve the reuse.  We skip this optimization if both
+	 * maps_coalesce and opt_retain are disabled because VM is not cheap
+	 * enough in such cases to be used aggressively and extents cannot be
+	 * merged at will (only extents from the same VirtualAlloc can be
+	 * merged).  Note that it could still be risky to cache more extents
+	 * when either mpas_coalesce or opt_retain is enabled.  Yet doing
+	 * so is still beneficial in improving the reuse of extents with some
+	 * limits.  This choice should be reevaluated if
+	 * pac_alloc_retained_batched_size is changed to be more aggressive.
+	 */
+	if (sz_large_size_classes_disabled() && edata == NULL
+	    && (maps_coalesce || opt_retain)) {
+		size_t batched_size = pac_alloc_retained_batched_size(size);
+		/*
+		 * Note that ecache_alloc_grow will try to retrieve virtual
+		 * memory from both retained pool and directly from OS through
+		 * extent_alloc_wrapper if the retained pool has no qualified
+		 * extents.  This is also why the overcaching still works even
+		 * with opt_retain off.
+		 */
+		edata = ecache_alloc_grow(tsdn, pac, ehooks,
+		    &pac->ecache_retained, NULL, batched_size, alignment, zero,
+		    guarded);
+
+		if (edata != NULL && batched_size > size) {
+			edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks,
+			    edata, size, batched_size - size,
+			    /* holding_core_locks */ false);
+			if (trail == NULL) {
+				ecache_dalloc(tsdn, pac, ehooks,
+				    &pac->ecache_retained, edata);
+				edata = NULL;
+			} else {
+				ecache_dalloc(tsdn, pac, ehooks,
+				    &pac->ecache_dirty, trail);
+			}
+		}
+
+		if (edata != NULL) {
+			newly_mapped_size = batched_size;
+		}
+	}
+
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
 		    &pac->ecache_retained, NULL, size, alignment, zero,
 		    guarded);
-		if (config_stats && edata != NULL) {
-			atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
-			    ATOMIC_RELAXED);
+		if (edata != NULL) {
+			newly_mapped_size = size;
 		}
 	}
 
+	if (config_stats && newly_mapped_size != 0) {
+		atomic_fetch_add_zu(
+		    &pac->stats->pac_mapped, newly_mapped_size, ATOMIC_RELAXED);
+	}
+
 	return edata;
 }
 
@@ -140,8 +218,8 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 
 	edata_t *edata;
 	if (san_bump_enabled() && frequent_reuse) {
-		edata = san_bump_alloc(tsdn, &pac->sba, pac, ehooks, size,
-		    zero);
+		edata = san_bump_alloc(
+		    tsdn, &pac->sba, pac, ehooks, size, zero);
 	} else {
 		size_t size_with_guards = san_two_side_guarded_sz(size);
 		/* Alloc a non-guarded extent first.*/
@@ -150,12 +228,12 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		if (edata != NULL) {
 			/* Add guards around it. */
 			assert(edata_size_get(edata) == size_with_guards);
-			san_guard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap, true);
+			san_guard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap, true);
 		}
 	}
-	assert(edata == NULL || (edata_guarded_get(edata) &&
-	    edata_size_get(edata) == size));
+	assert(edata == NULL
+	    || (edata_guarded_get(edata) && edata_size_get(edata) == size));
 
 	return edata;
 }
@@ -164,7 +242,7 @@ static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
     bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	edata_t *edata = NULL;
@@ -175,13 +253,13 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 	 * for such allocations would always return NULL.
 	 * */
 	if (!guarded || frequent_reuse) {
-		edata =	pac_alloc_real(tsdn, pac, ehooks, size, alignment,
-		    zero, guarded);
+		edata = pac_alloc_real(
+		    tsdn, pac, ehooks, size, alignment, zero, guarded);
 	}
 	if (edata == NULL && guarded) {
 		/* No cached guarded extents; creating a new one. */
-		edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size,
-		    alignment, zero, frequent_reuse);
+		edata = pac_alloc_new_guarded(
+		    tsdn, pac, ehooks, size, alignment, zero, frequent_reuse);
 	}
 
 	return edata;
@@ -190,7 +268,7 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 static bool
 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	size_t mapped_add = 0;
@@ -219,8 +297,8 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&pac->stats->pac_mapped, mapped_add,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &pac->stats->pac_mapped, mapped_add, ATOMIC_RELAXED);
 	}
 	return false;
 }
@@ -228,7 +306,7 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 static bool
 pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	size_t shrink_amount = old_size - new_size;
@@ -248,9 +326,9 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 }
 
 static void
-pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+pac_dalloc_impl(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	if (edata_guarded_get(edata)) {
@@ -267,10 +345,10 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		 * guarded).
 		 */
 		if (!edata_slab_get(edata) || !maps_coalesce) {
-			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
-			    !maps_coalesce);
-			san_unguard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap);
+			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS
+			    || !maps_coalesce);
+			san_unguard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap);
 		}
 	}
 
@@ -285,8 +363,8 @@ pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
 		/* Use minimal interval if decay is contended. */
 		return BACKGROUND_THREAD_DEFERRED_MIN;
 	}
-	uint64_t result = decay_ns_until_purge(decay, npages,
-	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
+	uint64_t result = decay_ns_until_purge(
+	    decay, npages, ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
 
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 	return result;
@@ -295,18 +373,16 @@ pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
 static uint64_t
 pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	uint64_t time;
-	pac_t *pac = (pac_t *)self;
+	pac_t   *pac = (pac_t *)self;
 
-	time = pac_ns_until_purge(tsdn,
-	    &pac->decay_dirty,
-	    ecache_npages_get(&pac->ecache_dirty));
+	time = pac_ns_until_purge(
+	    tsdn, &pac->decay_dirty, ecache_npages_get(&pac->ecache_dirty));
 	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
 		return time;
 	}
 
-	uint64_t muzzy = pac_ns_until_purge(tsdn,
-	    &pac->decay_muzzy,
-	    ecache_npages_get(&pac->ecache_muzzy));
+	uint64_t muzzy = pac_ns_until_purge(
+	    tsdn, &pac->decay_muzzy, ecache_npages_get(&pac->ecache_muzzy));
 	if (muzzy < time) {
 		time = muzzy;
 	}
@@ -314,8 +390,8 @@ pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 }
 
 bool
-pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
-    size_t *new_limit) {
+pac_retain_grow_limit_get_set(
+    tsdn_t *tsdn, pac_t *pac, size_t *old_limit, size_t *new_limit) {
 	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
 	if (new_limit != NULL) {
 		size_t limit = *new_limit;
@@ -341,15 +417,15 @@ static size_t
 pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     size_t npages_limit, size_t npages_decay_max,
     edata_list_inactive_t *result) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
 	while (nstashed < npages_decay_max) {
-		edata_t *edata = ecache_evict(tsdn, pac, ehooks, ecache,
-		    npages_limit);
+		edata_t *edata = ecache_evict(
+		    tsdn, pac, ehooks, ecache, npages_limit);
 		if (edata == NULL) {
 			break;
 		}
@@ -359,6 +435,44 @@ pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	return nstashed;
 }
 
+static bool
+decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
+	cassert(have_process_madvise);
+	assert(opt_process_madvise_max_batch > 0);
+#ifndef JEMALLOC_HAVE_PROCESS_MADVISE
+	return true;
+#else
+	assert(
+	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	size_t len = opt_process_madvise_max_batch;
+	VARIABLE_ARRAY(struct iovec, vec, len);
+
+	size_t cur = 0, total_bytes = 0;
+	for (edata_t *edata = edata_list_inactive_first(decay_extents);
+	    edata != NULL;
+	    edata = edata_list_inactive_next(decay_extents, edata)) {
+		size_t pages_bytes = edata_size_get(edata);
+		vec[cur].iov_base = edata_base_get(edata);
+		vec[cur].iov_len = pages_bytes;
+		total_bytes += pages_bytes;
+		cur++;
+		if (cur == len) {
+			bool err = pages_purge_process_madvise(
+			    vec, len, total_bytes);
+			if (err) {
+				return true;
+			}
+			cur = 0;
+			total_bytes = 0;
+		}
+	}
+	if (cur > 0) {
+		return pages_purge_process_madvise(vec, cur, total_bytes);
+	}
+	return false;
+#endif
+}
+
 static size_t
 pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
@@ -374,8 +488,30 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	bool try_muzzy = !fully_decay
 	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
-	for (edata_t *edata = edata_list_inactive_first(decay_extents); edata !=
-	    NULL; edata = edata_list_inactive_first(decay_extents)) {
+	bool purge_to_retained = !try_muzzy
+	    || ecache->state == extent_state_muzzy;
+	/*
+	 * Attempt process_madvise only if 1) enabled, 2) purging to retained,
+	 * and 3) not using custom hooks.
+	 */
+	bool try_process_madvise = (opt_process_madvise_max_batch > 0)
+	    && purge_to_retained && ehooks_dalloc_will_fail(ehooks);
+
+	bool already_purged;
+	if (try_process_madvise) {
+		/*
+		 * If anything unexpected happened during process_madvise
+		 * (e.g. not supporting MADV_DONTNEED, or partial success for
+		 * some reason), we will consider nothing is purged and fallback
+		 * to the regular madvise.
+		 */
+		already_purged = !decay_with_process_madvise(decay_extents);
+	} else {
+		already_purged = false;
+	}
+
+	for (edata_t *edata = edata_list_inactive_first(decay_extents);
+	    edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
 		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
@@ -385,12 +521,10 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 		npurged += npages;
 
 		switch (ecache->state) {
-		case extent_state_active:
-			not_reached();
 		case extent_state_dirty:
 			if (try_muzzy) {
-				err = extent_purge_lazy_wrapper(tsdn, ehooks,
-				    edata, /* offset */ 0, size);
+				err = extent_purge_lazy_wrapper(
+				    tsdn, ehooks, edata, /* offset */ 0, size);
 				if (!err) {
 					ecache_dalloc(tsdn, pac, ehooks,
 					    &pac->ecache_muzzy, edata);
@@ -399,10 +533,18 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
+			if (already_purged) {
+				extent_dalloc_wrapper_purged(
+				    tsdn, pac, ehooks, edata);
+			} else {
+				extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
+			}
 			nunmapped += npages;
 			break;
+		case extent_state_active:
 		case extent_state_retained:
+		case extent_state_transition:
+		case extent_state_merging:
 		default:
 			not_reached();
 		}
@@ -435,8 +577,8 @@ static void
 pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 1);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 1);
 
 	if (decay->purging || npages_decay_max == 0) {
 		return;
@@ -446,8 +588,8 @@ pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 	edata_list_inactive_t decay_extents;
 	edata_list_inactive_init(&decay_extents);
-	size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit,
-	    npages_decay_max, &decay_extents);
+	size_t npurge = pac_stash_decayed(
+	    tsdn, pac, ecache, npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = pac_decay_stashed(tsdn, pac, decay,
 		    decay_stats, ecache, fully_decay, &decay_extents);
@@ -468,8 +610,8 @@ pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 static void
 pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    size_t current_npages, size_t npages_limit) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, size_t current_npages,
+    size_t npages_limit) {
 	if (current_npages > npages_limit) {
 		pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache,
 		    /* fully_decay */ false, npages_limit,
@@ -504,8 +646,8 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	nstime_t time;
 	nstime_init_update(&time);
 	size_t npages_current = ecache_npages_get(ecache);
-	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
-	    npages_current);
+	bool   epoch_advanced = decay_maybe_advance_epoch(
+            decay, &time, npages_current);
 	if (eagerness == PAC_PURGE_ALWAYS
 	    || (epoch_advanced && eagerness == PAC_PURGE_ON_EPOCH_ADVANCE)) {
 		size_t npages_limit = decay_npages_limit_get(decay);
@@ -519,9 +661,9 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 bool
 pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
-	decay_t *decay;
+	decay_t           *decay;
 	pac_decay_stats_t *decay_stats;
-	ecache_t *ecache;
+	ecache_t          *ecache;
 	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
 
 	if (!decay_ms_valid(decay_ms)) {
@@ -548,9 +690,9 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
 
 ssize_t
 pac_decay_ms_get(pac_t *pac, extent_state_t state) {
-	decay_t *decay;
+	decay_t           *decay;
 	pac_decay_stats_t *decay_stats;
-	ecache_t *ecache;
+	ecache_t          *ecache;
 	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
 	return decay_ms_read(decay);
 }
@@ -579,9 +721,10 @@ pac_destroy(tsdn_t *tsdn, pac_t *pac) {
 	 * dss-based extents for later reuse.
 	 */
 	ehooks_t *ehooks = pac_ehooks_get(pac);
-	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, pac, ehooks,
-	    &pac->ecache_retained, 0)) != NULL) {
+	edata_t  *edata;
+	while (
+	    (edata = ecache_evict(tsdn, pac, ehooks, &pac->ecache_retained, 0))
+	    != NULL) {
 		extent_destroy_wrapper(tsdn, pac, ehooks, edata);
 	}
 }
diff --git a/src/pages.c b/src/pages.c
index 8c83a7de..5c12ae42 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -8,41 +8,46 @@
 #include "jemalloc/internal/malloc_io.h"
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
-#include <sys/sysctl.h>
-#ifdef __FreeBSD__
-#include <vm/vm_param.h>
-#endif
+#	include <sys/sysctl.h>
+#	ifdef __FreeBSD__
+#		include <vm/vm_param.h>
+#	endif
 #endif
 #ifdef __NetBSD__
-#include <sys/bitops.h>	/* ilog2 */
+#	include <sys/bitops.h> /* ilog2 */
 #endif
 #ifdef JEMALLOC_HAVE_VM_MAKE_TAG
-#define PAGES_FD_TAG VM_MAKE_TAG(101U)
+#	define PAGES_FD_TAG VM_MAKE_TAG(254U)
 #else
-#define PAGES_FD_TAG -1
+#	define PAGES_FD_TAG -1
+#endif
+#if defined(JEMALLOC_HAVE_PRCTL) && defined(JEMALLOC_PAGEID)
+#	include <sys/prctl.h>
+#	ifndef PR_SET_VMA
+#		define PR_SET_VMA 0x53564d41
+#		define PR_SET_VMA_ANON_NAME 0
+#	endif
 #endif
 
 /******************************************************************************/
 /* Data. */
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
-static size_t	os_page;
+size_t os_page;
 
 #ifndef _WIN32
-#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
-#  define PAGES_PROT_DECOMMIT (PROT_NONE)
-static int	mmap_flags;
+#	define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+#	define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int mmap_flags;
 #endif
-static bool	os_overcommits;
+static bool os_overcommits;
 
-const char *thp_mode_names[] = {
-	"default",
-	"always",
-	"never",
-	"not supported"
-};
-thp_mode_t opt_thp = THP_MODE_DEFAULT;
-thp_mode_t init_system_thp_mode;
+const char *const thp_mode_names[] = {
+    "default", "always", "never", "not supported"};
+const char *const system_thp_mode_names[] = {
+    "madvise", "always", "never", "not supported"};
+thp_mode_t        opt_thp = THP_MODE_DEFAULT;
+system_thp_mode_t init_system_thp_mode;
 
 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
 static bool pages_can_purge_lazy_runtime = true;
@@ -59,16 +64,16 @@ static int madvise_dont_need_zeros_is_faulty = -1;
  *
  *   [1]: https://patchwork.kernel.org/patch/10576637/
  */
-static int madvise_MADV_DONTNEED_zeroes_pages()
-{
-	int works = -1;
+static int
+madvise_MADV_DONTNEED_zeroes_pages(void) {
 	size_t size = PAGE;
 
-	void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
-	    MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	void *addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+	    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 
 	if (addr == MAP_FAILED) {
-		malloc_write("<jemalloc>: Cannot allocate memory for "
+		malloc_write(
+		    "<jemalloc>: Cannot allocate memory for "
 		    "MADV_DONTNEED check\n");
 		if (opt_abort) {
 			abort();
@@ -76,6 +81,7 @@ static int madvise_MADV_DONTNEED_zeroes_pages()
 	}
 
 	memset(addr, 'A', size);
+	int works;
 	if (madvise(addr, size, MADV_DONTNEED) == 0) {
 		works = memchr(addr, 'A', size) == NULL;
 	} else {
@@ -87,7 +93,8 @@ static int madvise_MADV_DONTNEED_zeroes_pages()
 	}
 
 	if (munmap(addr, size) != 0) {
-		malloc_write("<jemalloc>: Cannot deallocate memory for "
+		malloc_write(
+		    "<jemalloc>: Cannot deallocate memory for "
 		    "MADV_DONTNEED check\n");
 		if (opt_abort) {
 			abort();
@@ -98,6 +105,26 @@ static int madvise_MADV_DONTNEED_zeroes_pages()
 }
 #endif
 
+#ifdef JEMALLOC_PAGEID
+static int
+os_page_id(void *addr, size_t size, const char *name) {
+#	ifdef JEMALLOC_HAVE_PRCTL
+	/*
+	 * While parsing `/proc/<pid>/maps` file, the block could appear as
+	 * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]`
+	 */
+	int n;
+	assert(addr != NULL);
+	n = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
+	    (uintptr_t)name);
+	assert(n == 0 || (n == -1 && get_errno() == EINVAL));
+	return n;
+#	else
+	return 0;
+#	endif
+}
+#endif
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -132,7 +159,8 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	 * of existing mappings, and we only want to create new mappings.
 	 */
 	{
-#ifdef __NetBSD__
+		int flags = mmap_flags;
+#	ifdef __NetBSD__
 		/*
 		 * On NetBSD PAGE for a platform is defined to the
 		 * maximum page size of all machine architectures
@@ -141,12 +169,12 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 		 */
 		if (alignment > os_page || PAGE > os_page) {
 			unsigned int a = ilog2(MAX(alignment, PAGE));
-			mmap_flags |= MAP_ALIGNED(a);
+			flags |= MAP_ALIGNED(a);
 		}
-#endif
+#	endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
-		ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0);
+		ret = mmap(addr, size, prot, flags, PAGES_FD_TAG, 0);
 	}
 	assert(ret != NULL);
 
@@ -160,15 +188,21 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 		ret = NULL;
 	}
 #endif
-	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
-	    ret == addr));
+	assert(ret == NULL || (addr == NULL && ret != addr)
+	    || (addr != NULL && ret == addr));
+#ifdef JEMALLOC_PAGEID
+	if (ret != NULL) {
+		os_page_id(ret, size,
+		    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
+	}
+#endif
 	return ret;
 }
 
 static void *
-os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
-    bool *commit) {
-	void *ret = (void *)((uintptr_t)addr + leadsize);
+os_pages_trim(
+    void *addr, size_t alloc_size, size_t leadsize, size_t size, bool *commit) {
+	void *ret = (void *)((byte_t *)addr + leadsize);
 
 	assert(alloc_size >= leadsize + size);
 #ifdef _WIN32
@@ -188,7 +222,7 @@ os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 		os_pages_unmap(addr, leadsize);
 	}
 	if (trailsize != 0) {
-		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+		os_pages_unmap((void *)((byte_t *)ret + size), trailsize);
 	}
 	return ret;
 #endif
@@ -208,13 +242,15 @@ os_pages_unmap(void *addr, size_t size) {
 		char buf[BUFERROR_BUF];
 
 		buferror(get_errno(), buf, sizeof(buf));
-		malloc_printf("<jemalloc>: Error in "
+		malloc_printf(
+		    "<jemalloc>: Error in "
 #ifdef _WIN32
 		    "VirtualFree"
 #else
 		    "munmap"
 #endif
-		    "(): %s\n", buf);
+		    "(): %s\n",
+		    buf);
 		if (opt_abort) {
 			abort();
 		}
@@ -321,13 +357,14 @@ os_pages_commit(void *addr, size_t size, bool commit) {
 	assert(PAGE_CEILING(size) == size);
 
 #ifdef _WIN32
-	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
-	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+	return (commit
+	        ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE))
+	        : (!VirtualFree(addr, size, MEM_DECOMMIT)));
 #else
 	{
-		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
-		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
-		    PAGES_FD_TAG, 0);
+		int   prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		void *result = mmap(
+		    addr, size, prot, mmap_flags | MAP_FIXED, PAGES_FD_TAG, 0);
 		if (result == MAP_FAILED) {
 			return true;
 		}
@@ -366,8 +403,8 @@ pages_decommit(void *addr, size_t size) {
 void
 pages_mark_guards(void *head, void *tail) {
 	assert(head != NULL || tail != NULL);
-	assert(head == NULL || tail == NULL ||
-	    (uintptr_t)head < (uintptr_t)tail);
+	assert(
+	    head == NULL || tail == NULL || (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
 	if (head != NULL) {
 		mprotect(head, PAGE, PROT_NONE);
@@ -389,13 +426,12 @@ pages_mark_guards(void *head, void *tail) {
 void
 pages_unmark_guards(void *head, void *tail) {
 	assert(head != NULL || tail != NULL);
-	assert(head == NULL || tail == NULL ||
-	    (uintptr_t)head < (uintptr_t)tail);
+	assert(
+	    head == NULL || tail == NULL || (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
-	bool head_and_tail = (head != NULL) && (tail != NULL);
-	size_t range = head_and_tail ?
-	    (uintptr_t)tail - (uintptr_t)head + PAGE :
-	    SIZE_T_MAX;
+	bool   head_and_tail = (head != NULL) && (tail != NULL);
+	size_t range = head_and_tail ? (uintptr_t)tail - (uintptr_t)head + PAGE
+	                             : SIZE_T_MAX;
 	/*
 	 * The amount of work that the kernel does in mprotect depends on the
 	 * range argument.  SC_LARGE_MINCLASS is an arbitrary threshold chosen
@@ -444,17 +480,18 @@ pages_purge_lazy(void *addr, size_t size) {
 	return false;
 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
 	return (madvise(addr, size,
-#  ifdef MADV_FREE
-	    MADV_FREE
-#  else
-	    JEMALLOC_MADV_FREE
-#  endif
-	    ) != 0);
-#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+#	ifdef MADV_FREE
+	            MADV_FREE
+#	else
+	            JEMALLOC_MADV_FREE
+#	endif
+	            )
+	    != 0);
+#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                                 \
+    && !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
-#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
-    !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED)                           \
+    && !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
 	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #else
 	not_reached();
@@ -470,14 +507,14 @@ pages_purge_forced(void *addr, size_t size) {
 		return true;
 	}
 
-#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
-	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
-	    madvise(addr, size, MADV_DONTNEED) != 0);
-#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
-	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
-	    posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
+#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                                   \
+    && defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+	return (unlikely(madvise_dont_need_zeros_is_faulty)
+	    || madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED)                           \
+    && defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+	return (unlikely(madvise_dont_need_zeros_is_faulty)
+	    || posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_MAPS_COALESCE)
 	/* Try to overlay a new demand-zeroed mapping. */
 	return pages_commit(addr, size);
@@ -538,6 +575,30 @@ pages_nohuge_unaligned(void *addr, size_t size) {
 	return pages_nohuge_impl(addr, size, false);
 }
 
+bool
+pages_collapse(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+	/*
+	 * There is one more MADV_COLLAPSE precondition that is not easy to
+	 * express with assert statement.  In order to madvise(addr, size,
+	 * MADV_COLLAPSE) call to be successful, at least one page in the range
+	 * must currently be backed by physical memory.  In particularly, this
+	 * means we can't call pages_collapse on freshly mapped memory region.
+	 * See madvise(2) man page for more details.
+	 */
+#if defined(JEMALLOC_HAVE_MADVISE_COLLAPSE)                                    \
+    && (defined(MADV_COLLAPSE) || defined(JEMALLOC_MADV_COLLAPSE))
+#	if defined(MADV_COLLAPSE)
+	return (madvise(addr, size, MADV_COLLAPSE) != 0);
+#	elif defined(JEMALLOC_MADV_COLLAPSE)
+	return (madvise(addr, size, JEMALLOC_MADV_COLLAPSE) != 0);
+#	endif
+#else
+	return true;
+#endif
+}
+
 bool
 pages_dontdump(void *addr, size_t size) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
@@ -564,6 +625,83 @@ pages_dodump(void *addr, size_t size) {
 #endif
 }
 
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+#	include <sys/mman.h>
+#	include <sys/syscall.h>
+
+#	ifndef PIDFD_SELF
+#		define PIDFD_SELF -10000
+#	endif
+
+static atomic_b_t process_madvise_gate = ATOMIC_INIT(true);
+
+static bool
+init_process_madvise(void) {
+	if (opt_process_madvise_max_batch == 0) {
+		return false;
+	}
+
+	if (opt_process_madvise_max_batch > PROCESS_MADVISE_MAX_BATCH_LIMIT) {
+		opt_process_madvise_max_batch = PROCESS_MADVISE_MAX_BATCH_LIMIT;
+	}
+
+	return false;
+}
+
+#	ifdef SYS_process_madvise
+#		define JE_SYS_PROCESS_MADVISE_NR SYS_process_madvise
+#	else
+#		define JE_SYS_PROCESS_MADVISE_NR                              \
+			EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
+#	endif
+
+static bool
+pages_purge_process_madvise_impl(
+    void *vec, size_t vec_len, size_t total_bytes) {
+	if (!atomic_load_b(&process_madvise_gate, ATOMIC_RELAXED)) {
+		return true;
+	}
+
+	/*
+	 * TODO: remove this save/restore of errno after supporting errno
+	 * preservation for free() call properly.
+	 */
+	int    saved_errno = get_errno();
+	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR,
+	    PIDFD_SELF, (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
+	if (purged_bytes == (size_t)-1) {
+		if (errno == EPERM || errno == EINVAL || errno == ENOSYS
+		    || errno == EBADF) {
+			/* Process madvise not supported the way we need it. */
+			atomic_store_b(
+			    &process_madvise_gate, false, ATOMIC_RELAXED);
+		}
+		set_errno(saved_errno);
+	}
+
+	return purged_bytes != total_bytes;
+}
+
+#else
+
+static bool
+init_process_madvise(void) {
+	return false;
+}
+
+static bool
+pages_purge_process_madvise_impl(
+    void *vec, size_t vec_len, size_t total_bytes) {
+	not_reached();
+	return true;
+}
+
+#endif
+
+bool
+pages_purge_process_madvise(void *vec, size_t vec_len, size_t total_bytes) {
+	return pages_purge_process_madvise_impl(vec, vec_len, total_bytes);
+}
 
 static size_t
 os_page_detect(void) {
@@ -580,7 +718,7 @@ os_page_detect(void) {
 #else
 	long result = sysconf(_SC_PAGESIZE);
 	if (result == -1) {
-		return LG_PAGE;
+		return PAGE;
 	}
 	return (size_t)result;
 #endif
@@ -589,11 +727,11 @@ os_page_detect(void) {
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 static bool
 os_overcommits_sysctl(void) {
-	int vm_overcommit;
+	int    vm_overcommit;
 	size_t sz;
 
 	sz = sizeof(vm_overcommit);
-#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
+#	if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
 	int mib[2];
 
 	mib[0] = CTL_VM;
@@ -601,69 +739,38 @@ os_overcommits_sysctl(void) {
 	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
 		return false; /* Error. */
 	}
-#else
+#	else
 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
 		return false; /* Error. */
 	}
-#endif
+#	endif
 
 	return ((vm_overcommit & 0x3) == 0);
 }
 #endif
 
 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
-/*
- * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
- * reentry during bootstrapping if another library has interposed system call
- * wrappers.
- */
 static bool
 os_overcommits_proc(void) {
-	int fd;
+	int  fd;
 	char buf[1];
 
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	#if defined(O_CLOEXEC)
-		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
-			O_CLOEXEC);
-	#else
-		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
-#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	#if defined(O_CLOEXEC)
-		fd = (int)syscall(SYS_openat,
-			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
-	#else
-		fd = (int)syscall(SYS_openat,
-			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
-#else
-	#if defined(O_CLOEXEC)
-		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
-	#else
-		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
-#endif
+#	if defined(O_CLOEXEC)
+	fd = malloc_open(
+	    "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+#	else
+	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+	if (fd != -1) {
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	}
+#	endif
 
 	if (fd == -1) {
 		return false; /* Error. */
 	}
 
 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
-	syscall(SYS_close, fd);
-#else
-	close(fd);
-#endif
+	malloc_close(fd);
 
 	if (nread < 1) {
 		return false; /* Error. */
@@ -678,21 +785,31 @@ os_overcommits_proc(void) {
 }
 #endif
 
+static bool
+pages_should_skip_set_thp_state() {
+	if (opt_thp == thp_mode_do_nothing
+	    || (opt_thp == thp_mode_always
+	        && init_system_thp_mode == system_thp_mode_always)
+	    || (opt_thp == thp_mode_never
+	        && init_system_thp_mode == system_thp_mode_never)) {
+		return true;
+	}
+	return false;
+}
 void
-pages_set_thp_state (void *ptr, size_t size) {
-	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
+pages_set_thp_state(void *ptr, size_t size) {
+	if (pages_should_skip_set_thp_state()) {
 		return;
 	}
-	assert(opt_thp != thp_mode_not_supported &&
-	    init_system_thp_mode != thp_mode_not_supported);
+	assert(opt_thp != thp_mode_not_supported
+	    && init_system_thp_mode != system_thp_mode_not_supported);
 
 	if (opt_thp == thp_mode_always
-	    && init_system_thp_mode != thp_mode_never) {
-		assert(init_system_thp_mode == thp_mode_default);
+	    && init_system_thp_mode == system_thp_mode_madvise) {
 		pages_huge_unaligned(ptr, size);
 	} else if (opt_thp == thp_mode_never) {
-		assert(init_system_thp_mode == thp_mode_default ||
-		    init_system_thp_mode == thp_mode_always);
+		assert(init_system_thp_mode == system_thp_mode_madvise
+		    || init_system_thp_mode == system_thp_mode_always);
 		pages_nohuge_unaligned(ptr, size);
 	}
 }
@@ -710,48 +827,47 @@ init_thp_state(void) {
 	static const char sys_state_madvise[] = "always [madvise] never\n";
 	static const char sys_state_always[] = "[always] madvise never\n";
 	static const char sys_state_never[] = "always madvise [never]\n";
-	char buf[sizeof(sys_state_madvise)];
+	char              buf[sizeof(sys_state_madvise)];
 
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	int fd = (int)syscall(SYS_open,
+	int fd = malloc_open(
 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	int fd = (int)syscall(SYS_openat,
-		    AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#else
-	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#endif
 	if (fd == -1) {
 		goto label_error;
 	}
 
 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
-	syscall(SYS_close, fd);
-#else
-	close(fd);
-#endif
-
-        if (nread < 0) {
+	malloc_close(fd);
+	if (nread < 0) {
 		goto label_error;
-        }
+	}
 
 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_default;
+		init_system_thp_mode = system_thp_mode_madvise;
 	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_always;
+		init_system_thp_mode = system_thp_mode_always;
 	} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_never;
+		init_system_thp_mode = system_thp_mode_never;
 	} else {
 		goto label_error;
 	}
+	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
+		if (init_system_thp_mode == system_thp_mode_madvise) {
+			opt_hpa_opts.hugify_style = hpa_hugify_style_lazy;
+		} else {
+			opt_hpa_opts.hugify_style = hpa_hugify_style_none;
+		}
+	}
 	return;
 #elif defined(JEMALLOC_HAVE_MEMCNTL)
-	init_system_thp_mode = thp_mode_default;
+	init_system_thp_mode = system_thp_mode_madvise;
+	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
+		opt_hpa_opts.hugify_style = hpa_hugify_style_eager;
+	}
 	return;
 #endif
 label_error:
-	opt_thp = init_system_thp_mode = thp_mode_not_supported;
+	opt_thp = thp_mode_not_supported;
+	init_system_thp_mode = system_thp_mode_not_supported;
 }
 
 bool
@@ -767,10 +883,13 @@ pages_boot(void) {
 
 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
 	if (!opt_trust_madvise) {
-		madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
+		madvise_dont_need_zeros_is_faulty =
+		    !madvise_MADV_DONTNEED_zeroes_pages();
 		if (madvise_dont_need_zeros_is_faulty) {
-			malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
-			malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
+			malloc_write(
+			    "<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
+			malloc_write(
+			    "<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
 		}
 	} else {
 		/* In case opt_trust_madvise is disable,
@@ -787,11 +906,11 @@ pages_boot(void) {
 	os_overcommits = os_overcommits_sysctl();
 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
 	os_overcommits = os_overcommits_proc();
-#  ifdef MAP_NORESERVE
+#	ifdef MAP_NORESERVE
 	if (os_overcommits) {
 		mmap_flags |= MAP_NORESERVE;
 	}
-#  endif
+#	endif
 #elif defined(__NetBSD__)
 	os_overcommits = true;
 #else
@@ -807,8 +926,9 @@ pages_boot(void) {
 #else
 	/* Detect lazy purge runtime support. */
 	if (pages_can_purge_lazy) {
-		bool committed = false;
-		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
+		bool  committed = false;
+		void *madv_free_page = os_pages_map(
+		    NULL, PAGE, PAGE, &committed);
 		if (madv_free_page == NULL) {
 			return true;
 		}
@@ -819,6 +939,12 @@ pages_boot(void) {
 		os_pages_unmap(madv_free_page, PAGE);
 	}
 #endif
+	if (init_process_madvise()) {
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
 
 	return false;
 }
diff --git a/src/pai.c b/src/pai.c
deleted file mode 100644
index 45c87729..00000000
--- a/src/pai.c
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-size_t
-pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
-	for (size_t i = 0; i < nallocs; i++) {
-		bool deferred_by_alloc = false;
-		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_by_alloc);
-		*deferred_work_generated |= deferred_by_alloc;
-		if (edata == NULL) {
-			return i;
-		}
-		edata_list_active_append(results, edata);
-	}
-	return nallocs;
-}
-
-void
-pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated) {
-	edata_t *edata;
-	while ((edata = edata_list_active_first(list)) != NULL) {
-		bool deferred_by_dalloc = false;
-		edata_list_active_remove(list, edata);
-		pai_dalloc(tsdn, self, edata, &deferred_by_dalloc);
-		*deferred_work_generated |= deferred_by_dalloc;
-	}
-}
diff --git a/src/peak_event.c b/src/peak_event.c
index 4093fbcc..39f90b70 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -3,43 +3,24 @@
 
 #include "jemalloc/internal/peak_event.h"
 
-#include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/peak.h"
-
-/*
- * Update every 64K by default.  We're not exposing this as a configuration
- * option for now; we don't want to bind ourselves too tightly to any particular
- * performance requirements for small values, or guarantee that we'll even be
- * able to provide fine-grained accuracy.
- */
-#define PEAK_EVENT_WAIT (64 * 1024)
+#include "jemalloc/internal/thread_event_registry.h"
 
 /* Update the peak with current tsd state. */
 void
 peak_event_update(tsd_t *tsd) {
 	uint64_t alloc = tsd_thread_allocated_get(tsd);
 	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t *peak = tsd_peakp_get(tsd);
+	peak_t  *peak = tsd_peakp_get(tsd);
 	peak_update(peak, alloc, dalloc);
 }
 
-static void
-peak_event_activity_callback(tsd_t *tsd) {
-	activity_callback_thunk_t *thunk = tsd_activity_callback_thunkp_get(
-	    tsd);
-	uint64_t alloc = tsd_thread_allocated_get(tsd);
-	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	if (thunk->callback != NULL) {
-		thunk->callback(thunk->uctx, alloc, dalloc);
-	}
-}
-
 /* Set current state to zero. */
 void
 peak_event_zero(tsd_t *tsd) {
 	uint64_t alloc = tsd_thread_allocated_get(tsd);
 	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t *peak = tsd_peakp_get(tsd);
+	peak_t  *peak = tsd_peakp_get(tsd);
 	peak_set_zero(peak, alloc, dalloc);
 }
 
@@ -49,34 +30,30 @@ peak_event_max(tsd_t *tsd) {
 	return peak_max(peak);
 }
 
-uint64_t
-peak_alloc_new_event_wait(tsd_t *tsd) {
+static uint64_t
+peak_event_new_event_wait(tsd_t *tsd) {
 	return PEAK_EVENT_WAIT;
 }
 
-uint64_t
-peak_alloc_postponed_event_wait(tsd_t *tsd) {
+static uint64_t
+peak_event_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
-void
-peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+static void
+peak_event_handler(tsd_t *tsd) {
 	peak_event_update(tsd);
-	peak_event_activity_callback(tsd);
 }
 
-uint64_t
-peak_dalloc_new_event_wait(tsd_t *tsd) {
-	return PEAK_EVENT_WAIT;
+static te_enabled_t
+peak_event_enabled(void) {
+	return config_stats ? te_enabled_yes : te_enabled_no;
 }
 
-uint64_t
-peak_dalloc_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
-}
-
-void
-peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	peak_event_update(tsd);
-	peak_event_activity_callback(tsd);
-}
+/* Handles alloc and dalloc */
+te_base_cb_t peak_te_handler = {
+    .enabled = &peak_event_enabled,
+    .new_event_wait = &peak_event_new_event_wait,
+    .postponed_event_wait = &peak_event_postponed_event_wait,
+    .event_handler = &peak_event_handler,
+};
diff --git a/src/prof.c b/src/prof.c
index 7a6d5d56..a833fed5 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/prof_hook.h"
 #include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/thread_event_registry.h"
 
 /*
  * This file implements the profiling "APIs" needed by other parts of jemalloc,
@@ -23,19 +24,21 @@
 
 /* Data. */
 
-bool opt_prof = false;
-bool opt_prof_active = true;
-bool opt_prof_thread_active_init = true;
-size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
-ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
-bool opt_prof_gdump = false;
-bool opt_prof_final = false;
-bool opt_prof_leak = false;
-bool opt_prof_leak_error = false;
-bool opt_prof_accum = false;
-char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
-bool opt_prof_sys_thread_name = false;
-bool opt_prof_unbias = true;
+bool     opt_prof = false;
+bool     opt_prof_active = true;
+bool     opt_prof_thread_active_init = true;
+unsigned opt_prof_bt_max = PROF_BT_MAX_DEFAULT;
+size_t   opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
+ssize_t  opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
+bool     opt_prof_gdump = false;
+bool     opt_prof_final = false;
+bool     opt_prof_leak = false;
+bool     opt_prof_leak_error = false;
+bool     opt_prof_accum = false;
+bool     opt_prof_pid_namespace = false;
+char     opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
+bool     opt_prof_sys_thread_name = false;
+bool     opt_prof_unbias = true;
 
 /* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
@@ -44,38 +47,44 @@ static counter_accum_t prof_idump_accumulated;
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
  */
-bool prof_active_state;
+bool                  prof_active_state;
 static malloc_mutex_t prof_active_mtx;
 
 /*
  * Initialized as opt_prof_thread_active_init, and accessed via
  * prof_thread_active_init_[gs]et().
  */
-static bool prof_thread_active_init;
+static bool           prof_thread_active_init;
 static malloc_mutex_t prof_thread_active_init_mtx;
 
 /*
  * Initialized as opt_prof_gdump, and accessed via
  * prof_gdump_[gs]et{_unlocked,}().
  */
-bool prof_gdump_val;
+bool                  prof_gdump_val;
 static malloc_mutex_t prof_gdump_mtx;
 
 uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
 
-static uint64_t next_thr_uid;
+static uint64_t       next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool prof_booted = false;
 
 /* Logically a prof_backtrace_hook_t. */
-atomic_p_t prof_backtrace_hook;
+static atomic_p_t prof_backtrace_hook;
 
 /* Logically a prof_dump_hook_t. */
-atomic_p_t prof_dump_hook;
+static atomic_p_t prof_dump_hook;
+
+/* Logically a prof_sample_hook_t. */
+static atomic_p_t prof_sample_hook;
+
+/* Logically a prof_sample_free_hook_t. */
+static atomic_p_t prof_sample_free_hook;
 
 /******************************************************************************/
 
@@ -84,11 +93,19 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
 
 	if (tsd_reentrancy_level_get(tsd) > 0) {
-		assert((uintptr_t)tctx == (uintptr_t)1U);
+		assert(tctx == PROF_TCTX_SENTINEL);
 		return;
 	}
 
-	if ((uintptr_t)tctx > (uintptr_t)1U) {
+	if (prof_tctx_is_valid(tctx)) {
+		/*
+		 * This `assert` really shouldn't be necessary. It's here
+		 * because there's a bug in the clang static analyzer; it
+		 * somehow does not realize that by `prof_tctx_is_valid(tctx)`
+		 * being true that we've already ensured that `tctx` is not
+		 * `NULL`.
+		 */
+		assert(tctx != NULL);
 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
 		prof_tctx_try_destroy(tsd, tctx);
@@ -96,16 +113,16 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 }
 
 void
-prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
-    size_t usize, prof_tctx_t *tctx) {
+prof_malloc_sample_object(
+    tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx) {
 	cassert(config_prof);
 
 	if (opt_prof_sys_thread_name) {
 		prof_sys_thread_name_fetch(tsd);
 	}
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-	    ptr);
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	prof_info_set(tsd, edata, tctx, size);
 
 	szind_t szind = sz_size2index(usize);
@@ -144,17 +161,37 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	if (opt_prof_stats) {
 		prof_stats_inc(tsd, szind, size);
 	}
+
+	/* Sample hook. */
+	prof_sample_hook_t prof_sample_hook = prof_sample_hook_get();
+	if (prof_sample_hook != NULL) {
+		prof_bt_t *bt = &tctx->gctx->bt;
+		pre_reentrancy(tsd, NULL);
+		prof_sample_hook(ptr, size, bt->vec, bt->len, usize);
+		post_reentrancy(tsd);
+	}
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+prof_free_sampled_object(
+    tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info) {
 	cassert(config_prof);
 
 	assert(prof_info != NULL);
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
-	assert((uintptr_t)tctx > (uintptr_t)1U);
+	assert(prof_tctx_is_valid(tctx));
 
 	szind_t szind = sz_size2index(usize);
+
+	/* Unsample hook. */
+	prof_sample_free_hook_t prof_sample_free_hook =
+	    prof_sample_free_hook_get();
+	if (prof_sample_free_hook != NULL) {
+		pre_reentrancy(tsd, NULL);
+		prof_sample_free_hook(ptr, usize);
+		post_reentrancy(tsd);
+	}
+
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs > 0);
@@ -242,9 +279,12 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 	 * otherwise bytes_until_sample would be 0 if u is exactly 1.0.
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
-	double u = (r == 0U) ? 1.0 : (double)r * (1.0/9007199254740992.0L);
-	return (uint64_t)(log(u) /
-	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
+	double   u = (r == 0U)
+	      ? 1.0
+	      : (double)((long double)r * (1.0L / 9007199254740992.0L));
+	return (uint64_t)(log(u)
+	           / log(
+	               1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
 #else
 	not_reached();
@@ -252,30 +292,51 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 #endif
 }
 
+void
+prof_sample_event_handler(tsd_t *tsd) {
+	cassert(config_prof);
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return;
+	}
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_sample_event = tsd_prof_sample_last_event_get(tsd);
+	tsd_prof_sample_last_event_set(tsd, last_event);
+	uint64_t elapsed = last_event - last_sample_event;
+	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
+	if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
+		prof_idump(tsd_tsdn(tsd));
+	}
+}
+
 uint64_t
-prof_sample_postponed_event_wait(tsd_t *tsd) {
-	/*
+tsd_prof_sample_event_wait_get(tsd_t *tsd) {
+#ifdef JEMALLOC_PROF
+	return tsd_te_datap_get_unsafe(tsd)->alloc_wait[te_alloc_prof_sample];
+#else
+	not_reached();
+	return TE_MAX_START_WAIT;
+#endif
+}
+
+static te_enabled_t
+prof_sample_enabled(void) {
+	return config_prof && opt_prof ? te_enabled_yes : te_enabled_no;
+}
+
+te_base_cb_t prof_sample_te_handler = {
+    .enabled = &prof_sample_enabled,
+    .new_event_wait = &prof_sample_new_event_wait,
+    /*
 	 * The postponed wait time for prof sample event is computed as if we
 	 * want a new wait time (i.e. as if the event were triggered).  If we
 	 * instead postpone to the immediate next allocation, like how we're
 	 * handling the other events, then we can have sampling bias, if e.g.
 	 * the allocation immediately following a reentrancy always comes from
 	 * the same stack trace.
-	 */
-	return prof_sample_new_event_wait(tsd);
-}
-
-void
-prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	cassert(config_prof);
-	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return;
-	}
-	if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
-		prof_idump(tsd_tsdn(tsd));
-	}
-}
+	*/
+    .postponed_event_wait = &prof_sample_new_event_wait,
+    .event_handler = &prof_sample_event_handler,
+};
 
 static void
 prof_fdump(void) {
@@ -302,7 +363,7 @@ prof_idump_accum_init(void) {
 
 void
 prof_idump(tsdn_t *tsdn) {
-	tsd_t *tsd;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -341,7 +402,7 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 
 void
 prof_gdump(tsdn_t *tsdn) {
-	tsd_t *tsd;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -388,13 +449,16 @@ prof_tdata_t *
 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
-	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
-	bool active = tdata->active;
+	bool     active = tdata->active;
 
+	/* Keep a local copy of the thread name, before detaching. */
+	prof_thread_name_assert(tdata);
+	char thread_name[PROF_THREAD_NAME_MAX_LEN];
+	strncpy(thread_name, tdata->thread_name, PROF_THREAD_NAME_MAX_LEN);
 	prof_tdata_detach(tsd, tdata);
-	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active);
+
+	return prof_tdata_init_impl(
+	    tsd, thr_uid, thr_discrim, thread_name, active);
 }
 
 void
@@ -437,15 +501,15 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 
 const char *
 prof_thread_name_get(tsd_t *tsd) {
+	static const char *prof_thread_name_dummy = "";
+
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t *tdata;
-
-	tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
-		return "";
+		return prof_thread_name_dummy;
 	}
-	return (tdata->thread_name != NULL ? tdata->thread_name : "");
+
+	return tdata->thread_name;
 }
 
 int
@@ -532,9 +596,9 @@ prof_backtrace_hook_set(prof_backtrace_hook_t hook) {
 }
 
 prof_backtrace_hook_t
-prof_backtrace_hook_get() {
-	return (prof_backtrace_hook_t)atomic_load_p(&prof_backtrace_hook,
-	    ATOMIC_ACQUIRE);
+prof_backtrace_hook_get(void) {
+	return (prof_backtrace_hook_t)atomic_load_p(
+	    &prof_backtrace_hook, ATOMIC_ACQUIRE);
 }
 
 void
@@ -543,17 +607,38 @@ prof_dump_hook_set(prof_dump_hook_t hook) {
 }
 
 prof_dump_hook_t
-prof_dump_hook_get() {
-	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook,
-	    ATOMIC_ACQUIRE);
+prof_dump_hook_get(void) {
+	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook, ATOMIC_ACQUIRE);
+}
+
+void
+prof_sample_hook_set(prof_sample_hook_t hook) {
+	atomic_store_p(&prof_sample_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_sample_hook_t
+prof_sample_hook_get(void) {
+	return (prof_sample_hook_t)atomic_load_p(
+	    &prof_sample_hook, ATOMIC_ACQUIRE);
+}
+
+void
+prof_sample_free_hook_set(prof_sample_free_hook_t hook) {
+	atomic_store_p(&prof_sample_free_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_sample_free_hook_t
+prof_sample_free_hook_get(void) {
+	return (prof_sample_free_hook_t)atomic_load_p(
+	    &prof_sample_free_hook, ATOMIC_ACQUIRE);
 }
 
 void
 prof_boot0(void) {
 	cassert(config_prof);
 
-	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
-	    sizeof(PROF_PREFIX_DEFAULT));
+	memcpy(
+	    opt_prof_prefix, PROF_PREFIX_DEFAULT, sizeof(PROF_PREFIX_DEFAULT));
 }
 
 void
@@ -577,8 +662,8 @@ prof_boot1(void) {
 		opt_prof_gdump = false;
 	} else if (opt_prof) {
 		if (opt_lg_prof_interval >= 0) {
-			prof_interval = (((uint64_t)1U) <<
-			    opt_lg_prof_interval);
+			prof_interval = (((uint64_t)1U)
+			    << opt_lg_prof_interval);
 		}
 	}
 }
@@ -592,41 +677,40 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 	 * stats when opt_prof is false.
 	 */
 	if (malloc_mutex_init(&prof_active_mtx, "prof_active",
-	    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
-	    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_thread_active_init_mtx,
-	    "prof_thread_active_init", WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
-	    malloc_mutex_rank_exclusive)) {
+	        "prof_thread_active_init", WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
-	    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
-	    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-	    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
-	    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	if (malloc_mutex_init(&prof_dump_filename_mtx,
-	    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
-	    malloc_mutex_rank_exclusive)) {
+	if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
+	        WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
-	    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -646,8 +730,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			return true;
 		}
 
-		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
-		    atexit(prof_fdump) != 0) {
+		if (opt_prof_final && opt_prof_prefix[0] != '\0'
+		    && atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
 			if (opt_abort) {
 				abort();
@@ -671,8 +755,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		}
 		for (unsigned i = 0; i < PROF_NCTX_LOCKS; i++) {
 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
-			    WITNESS_RANK_PROF_GCTX,
-			    malloc_mutex_rank_exclusive)) {
+			        WITNESS_RANK_PROF_GCTX,
+			        malloc_mutex_rank_exclusive)) {
 				return true;
 			}
 		}
@@ -684,8 +768,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		}
 		for (unsigned i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
-			    WITNESS_RANK_PROF_TDATA,
-			    malloc_mutex_rank_exclusive)) {
+			        WITNESS_RANK_PROF_TDATA,
+			        malloc_mutex_rank_exclusive)) {
 				return true;
 			}
 		}
@@ -736,8 +820,8 @@ prof_postfork_parent(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(tsdn,
-		    &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(
+		    tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_stats_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
diff --git a/src/prof_data.c b/src/prof_data.c
index bfa55be1..7aa047ac 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -36,7 +36,7 @@ malloc_mutex_t prof_dump_mtx;
  * and destroying mutexes causes complications for systems that allocate when
  * creating/destroying mutexes.
  */
-malloc_mutex_t *gctx_locks;
+malloc_mutex_t   *gctx_locks;
 static atomic_u_t cum_gctxs; /* Atomic counter. */
 
 /*
@@ -69,43 +69,45 @@ static int
 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 	uint64_t a_thr_uid = a->thr_uid;
 	uint64_t b_thr_uid = b->thr_uid;
-	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+	int      ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
 	if (ret == 0) {
 		uint64_t a_thr_discrim = a->thr_discrim;
 		uint64_t b_thr_discrim = b->thr_discrim;
-		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
-		    b_thr_discrim);
+		ret = (a_thr_discrim > b_thr_discrim)
+		    - (a_thr_discrim < b_thr_discrim);
 		if (ret == 0) {
 			uint64_t a_tctx_uid = a->tctx_uid;
 			uint64_t b_tctx_uid = b->tctx_uid;
-			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
-			    b_tctx_uid);
+			ret = (a_tctx_uid > b_tctx_uid)
+			    - (a_tctx_uid < b_tctx_uid);
 		}
 	}
 	return ret;
 }
 
-rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
-    tctx_link, prof_tctx_comp)
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t, tctx_link,
+    prof_tctx_comp)
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-static int
-prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
+    static int prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 	unsigned a_len = a->bt.len;
 	unsigned b_len = b->bt.len;
 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
-	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+	int      ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
 	if (ret == 0) {
 		ret = (a_len > b_len) - (a_len < b_len);
 	}
 	return ret;
 }
 
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
     prof_gctx_comp)
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-static int
-prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
-	int ret;
+    static int prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
+	int      ret;
 	uint64_t a_uid = a->thr_uid;
 	uint64_t b_uid = b->thr_uid;
 
@@ -119,13 +121,14 @@ prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
 	return ret;
 }
 
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
     prof_tdata_comp)
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-/******************************************************************************/
+    /******************************************************************************/
 
-static malloc_mutex_t *
-prof_gctx_mutex_choose(void) {
+    static malloc_mutex_t *prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
 
 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
@@ -139,8 +142,8 @@ prof_tdata_mutex_choose(uint64_t thr_uid) {
 bool
 prof_data_init(tsd_t *tsd) {
 	tdata_tree_new(&tdatas);
-	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp);
+	return ckh_new(
+	    tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp);
 }
 
 static void
@@ -189,8 +192,8 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
-	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
-	    true);
+	    sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (gctx == NULL) {
 		return NULL;
 	}
@@ -209,8 +212,7 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 }
 
 static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self,
-    prof_gctx_t *gctx) {
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx) {
 	cassert(config_prof);
 
 	/*
@@ -261,12 +263,12 @@ static bool
 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
 	union {
-		prof_gctx_t	*p;
-		void		*v;
+		prof_gctx_t *p;
+		void        *v;
 	} gctx, tgctx;
 	union {
-		prof_bt_t	*p;
-		void		*v;
+		prof_bt_t *p;
+		void      *v;
 	} btkey;
 	bool new_gctx;
 
@@ -310,8 +312,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 
 		if (tgctx.v != NULL) {
 			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
-			    true);
+			idalloctm(
+			    tsd_tsdn(tsd), tgctx.v, NULL, NULL, true, true);
 		}
 	}
 	prof_leave(tsd, tdata);
@@ -325,11 +327,11 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 prof_tctx_t *
 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	union {
-		prof_tctx_t	*p;
-		void		*v;
+		prof_tctx_t *p;
+		void        *v;
 	} ret;
 	prof_tdata_t *tdata;
-	bool not_found;
+	bool          not_found;
 
 	cassert(config_prof);
 
@@ -343,16 +345,16 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (not_found) {
-		void *btkey;
+		void        *btkey;
 		prof_gctx_t *gctx;
-		bool new_gctx, error;
+		bool         new_gctx, error;
 
 		/*
 		 * This thread's cache lacks bt.  Look for it in the global
 		 * cache.
 		 */
-		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
-		    &new_gctx)) {
+		if (prof_lookup_global(
+		        tsd, bt, tdata, &btkey, &gctx, &new_gctx)) {
 			return NULL;
 		}
 
@@ -397,8 +399,8 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 /* Used in unit tests. */
 static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *arg) {
+prof_tdata_count_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *arg) {
 	size_t *tdata_count = (size_t *)arg;
 
 	(*tdata_count)++;
@@ -409,13 +411,13 @@ prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 /* Used in unit tests. */
 size_t
 prof_tdata_count(void) {
-	size_t tdata_count = 0;
+	size_t  tdata_count = 0;
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
-	    (void *)&tdata_count);
+	tdata_tree_iter(
+	    &tdatas, NULL, prof_tdata_count_iter, (void *)&tdata_count);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return tdata_count;
@@ -424,8 +426,8 @@ prof_tdata_count(void) {
 /* Used in unit tests. */
 size_t
 prof_bt_count(void) {
-	size_t bt_count;
-	tsd_t *tsd;
+	size_t        bt_count;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	tsd = tsd_fetch();
@@ -441,75 +443,40 @@ prof_bt_count(void) {
 	return bt_count;
 }
 
-char *
-prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
-	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
-	    true, arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-	memcpy(ret, thread_name, size);
-	return ret;
+static void
+prof_thread_name_write_tdata(prof_tdata_t *tdata, const char *thread_name) {
+	strncpy(tdata->thread_name, thread_name, PROF_THREAD_NAME_MAX_LEN);
+	tdata->thread_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
 }
 
 int
 prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 	assert(tsd_reentrancy_level_get(tsd) == 0);
+	assert(thread_name != NULL);
 
-	prof_tdata_t *tdata;
-	unsigned i;
-	char *s;
-
-	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return EAGAIN;
-	}
-
-	/* Validate input. */
-	if (thread_name == NULL) {
-		return EFAULT;
-	}
-	for (i = 0; thread_name[i] != '\0'; i++) {
+	for (unsigned i = 0; thread_name[i] != '\0'; i++) {
 		char c = thread_name[i];
 		if (!isgraph(c) && !isblank(c)) {
-			return EFAULT;
+			return EINVAL;
 		}
 	}
 
-	s = prof_thread_name_alloc(tsd, thread_name);
-	if (s == NULL) {
-		return EAGAIN;
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return ENOMEM;
 	}
 
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-		tdata->thread_name = NULL;
-	}
-	if (strlen(s) > 0) {
-		tdata->thread_name = s;
-	}
+	prof_thread_name_write_tdata(tdata, thread_name);
+
 	return 0;
 }
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
 static void
-prof_dump_printf(write_cb_t *prof_dump_write, void *cbopaque,
-    const char *format, ...) {
+prof_dump_printf(
+    write_cb_t *prof_dump_write, void *cbopaque, const char *format, ...) {
 	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
+	char    buf[PROF_PRINTF_BUFSIZE];
 
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
@@ -538,11 +505,18 @@ prof_double_uint64_cast(double d) {
 }
 #endif
 
-void prof_unbias_map_init() {
+void
+prof_unbias_map_init(void) {
 	/* See the comment in prof_sample_new_event_wait */
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
-		double sz = (double)sz_index2size(i);
+		/*
+		 * With large size classes disabled, the unbiased calculation
+		 * here is not as accurate as it was because usize now changes
+		 * in a finer grain while the unbiased_sz is still calculated
+		 * using the old way.
+		 */
+		double sz = (double)sz_index2size_unsafe(i);
 		double rate = (double)(ZU(1) << lg_prof_sample);
 		double div_val = 1.0 - exp(-sz / rate);
 		double unbiased_sz = sz / div_val;
@@ -644,8 +618,8 @@ prof_do_unbias(uint64_t c_out_shifted_i, uint64_t s_out_i, uint64_t *r_c_in,
 }
 
 static void
-prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
-    const prof_cnt_t *cnts) {
+prof_dump_print_cnts(
+    write_cb_t *prof_dump_write, void *cbopaque, const prof_cnt_t *cnts) {
 	uint64_t curobjs;
 	uint64_t curbytes;
 	uint64_t accumobjs;
@@ -662,8 +636,8 @@ prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
 		accumbytes = cnts->accumbytes;
 	}
 	prof_dump_printf(prof_dump_write, cbopaque,
-	    "%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
-	    curobjs, curbytes, accumobjs, accumbytes);
+	    "%" FMTu64 ": %" FMTu64 " [%" FMTu64 ": %" FMTu64 "]", curobjs,
+	    curbytes, accumobjs, accumbytes);
 }
 
 static void
@@ -683,11 +657,11 @@ prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-		tdata->cnt_summed.curobjs_shifted_unbiased
-		    += tctx->dump_cnts.curobjs_shifted_unbiased;
+		tdata->cnt_summed.curobjs_shifted_unbiased +=
+		    tctx->dump_cnts.curobjs_shifted_unbiased;
 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-		tdata->cnt_summed.curbytes_unbiased
-		    += tctx->dump_cnts.curbytes_unbiased;
+		tdata->cnt_summed.curbytes_unbiased +=
+		    tctx->dump_cnts.curbytes_unbiased;
 		if (opt_prof_accum) {
 			tdata->cnt_summed.accumobjs +=
 			    tctx->dump_cnts.accumobjs;
@@ -710,17 +684,17 @@ prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-	gctx->cnt_summed.curobjs_shifted_unbiased
-	    += tctx->dump_cnts.curobjs_shifted_unbiased;
+	gctx->cnt_summed.curobjs_shifted_unbiased +=
+	    tctx->dump_cnts.curobjs_shifted_unbiased;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
 	gctx->cnt_summed.curbytes_unbiased += tctx->dump_cnts.curbytes_unbiased;
 	if (opt_prof_accum) {
 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
-		gctx->cnt_summed.accumobjs_shifted_unbiased
-		    += tctx->dump_cnts.accumobjs_shifted_unbiased;
+		gctx->cnt_summed.accumobjs_shifted_unbiased +=
+		    tctx->dump_cnts.accumobjs_shifted_unbiased;
 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
-		gctx->cnt_summed.accumbytes_unbiased
-		    += tctx->dump_cnts.accumbytes_unbiased;
+		gctx->cnt_summed.accumbytes_unbiased +=
+		    tctx->dump_cnts.accumbytes_unbiased;
 	}
 }
 
@@ -738,6 +712,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	case prof_tctx_state_purgatory:
 		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
 		break;
+	case prof_tctx_state_initializing:
 	default:
 		not_reached();
 	}
@@ -747,9 +722,9 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 
 typedef struct prof_dump_iter_arg_s prof_dump_iter_arg_t;
 struct prof_dump_iter_arg_s {
-	tsdn_t *tsdn;
+	tsdn_t     *tsdn;
 	write_cb_t *prof_dump_write;
-	void *cbopaque;
+	void       *cbopaque;
 };
 
 static prof_tctx_t *
@@ -765,9 +740,9 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
 		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-		    "  t%"FMTu64": ", tctx->thr_uid);
-		prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-		    &tctx->dump_cnts);
+		    "  t%" FMTu64 ": ", tctx->thr_uid);
+		prof_dump_print_cnts(
+		    arg->prof_dump_write, arg->cbopaque, &tctx->dump_cnts);
 		arg->prof_dump_write(arg->cbopaque, "\n");
 		break;
 	default:
@@ -778,7 +753,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
+	tsdn_t      *tsdn = (tsdn_t *)arg;
 	prof_tctx_t *ret;
 
 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
@@ -793,6 +768,7 @@ prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	case prof_tctx_state_purgatory:
 		ret = tctx;
 		goto label_return;
+	case prof_tctx_state_initializing:
 	default:
 		not_reached();
 	}
@@ -832,8 +808,8 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	prof_gctx_merge_iter_arg_t *arg = (prof_gctx_merge_iter_arg_t *)opaque;
 
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsdn);
+	tctx_tree_iter(
+	    &gctx->tctxs, NULL, prof_tctx_merge_iter, (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0) {
 		(*arg->leak_ngctx)++;
 	}
@@ -845,7 +821,7 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 static void
 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	prof_gctx_t *gctx;
+	prof_gctx_t  *gctx;
 
 	/*
 	 * Standard tree iteration won't work here, because as soon as we
@@ -861,15 +837,14 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 
 			next = NULL;
 			do {
-				prof_tctx_t *to_destroy =
-				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter,
+				prof_tctx_t *to_destroy = tctx_tree_iter(
+				    &gctx->tctxs, next, prof_tctx_finish_iter,
 				    (void *)tsd_tsdn(tsd));
 				if (to_destroy != NULL) {
-					next = tctx_tree_next(&gctx->tctxs,
-					    to_destroy);
-					tctx_tree_remove(&gctx->tctxs,
-					    to_destroy);
+					next = tctx_tree_next(
+					    &gctx->tctxs, to_destroy);
+					tctx_tree_remove(
+					    &gctx->tctxs, to_destroy);
 					idalloctm(tsd_tsdn(tsd), to_destroy,
 					    NULL, NULL, true, true);
 				} else {
@@ -890,41 +865,41 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 
 typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
 struct prof_tdata_merge_iter_arg_s {
-	tsdn_t *tsdn;
+	tsdn_t     *tsdn;
 	prof_cnt_t *cnt_all;
 };
 
 static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *opaque) {
-	prof_tdata_merge_iter_arg_t *arg =
-	    (prof_tdata_merge_iter_arg_t *)opaque;
+prof_tdata_merge_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *opaque) {
+	prof_tdata_merge_iter_arg_t *arg = (prof_tdata_merge_iter_arg_t *)
+	    opaque;
 
 	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
-			prof_tctx_t	*p;
-			void		*v;
+			prof_tctx_t *p;
+			void        *v;
 		} tctx;
 
 		tdata->dumping = true;
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
-		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
-		    &tctx.v);) {
+		for (tabind = 0;
+		     !ckh_iter(&tdata->bt2tctx, &tabind, NULL, &tctx.v);) {
 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 		}
 
 		arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all->curobjs_shifted_unbiased
-		    += tdata->cnt_summed.curobjs_shifted_unbiased;
+		arg->cnt_all->curobjs_shifted_unbiased +=
+		    tdata->cnt_summed.curobjs_shifted_unbiased;
 		arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
-		arg->cnt_all->curbytes_unbiased
-		    += tdata->cnt_summed.curbytes_unbiased;
+		arg->cnt_all->curbytes_unbiased +=
+		    tdata->cnt_summed.curbytes_unbiased;
 		if (opt_prof_accum) {
 			arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all->accumobjs_shifted_unbiased
-			    += tdata->cnt_summed.accumobjs_shifted_unbiased;
+			arg->cnt_all->accumobjs_shifted_unbiased +=
+			    tdata->cnt_summed.accumobjs_shifted_unbiased;
 			arg->cnt_all->accumbytes +=
 			    tdata->cnt_summed.accumbytes;
 			arg->cnt_all->accumbytes_unbiased +=
@@ -939,18 +914,18 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 }
 
 static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *opaque) {
+prof_tdata_dump_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *opaque) {
 	if (!tdata->dumping) {
 		return NULL;
 	}
 
 	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
-	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, "  t%"FMTu64": ",
-	    tdata->thr_uid);
-	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-	    &tdata->cnt_summed);
-	if (tdata->thread_name != NULL) {
+	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
+	    "  t%" FMTu64 ": ", tdata->thr_uid);
+	prof_dump_print_cnts(
+	    arg->prof_dump_write, arg->cbopaque, &tdata->cnt_summed);
+	if (!prof_thread_name_empty(tdata)) {
 		arg->prof_dump_write(arg->cbopaque, " ");
 		arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
 	}
@@ -961,7 +936,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 static void
 prof_dump_header(prof_dump_iter_arg_t *arg, const prof_cnt_t *cnt_all) {
 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
+	    "heap_v2/%" FMTu64 "\n  t*: ", ((uint64_t)1U << lg_prof_sample));
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
 	arg->prof_dump_write(arg->cbopaque, "\n");
 
@@ -977,8 +952,8 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 	malloc_mutex_assert_owner(arg->tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
-	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
-	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0)
+	    || (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
 		assert(gctx->cnt_summed.curobjs == 0);
 		assert(gctx->cnt_summed.curbytes == 0);
 		/*
@@ -997,12 +972,12 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 	arg->prof_dump_write(arg->cbopaque, "@");
 	for (unsigned i = 0; i < bt->len; i++) {
 		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-		    " %#"FMTxPTR, (uintptr_t)bt->vec[i]);
+		    " %#" FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
 	arg->prof_dump_write(arg->cbopaque, "\n  t*: ");
-	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-	    &gctx->cnt_summed);
+	prof_dump_print_cnts(
+	    arg->prof_dump_write, arg->cbopaque, &gctx->cnt_summed);
 	arg->prof_dump_write(arg->cbopaque, "\n");
 
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, arg);
@@ -1023,18 +998,21 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 	 */
 	if (cnt_all->curbytes != 0) {
 		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
-		double ratio = (((double)cnt_all->curbytes) /
-		    (double)cnt_all->curobjs) / sample_period;
-		double scale_factor = 1.0 / (1.0 - exp(-ratio));
-		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
-		    * scale_factor);
-		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
-		    scale_factor);
+		double ratio = (((double)cnt_all->curbytes)
+		                   / (double)cnt_all->curobjs)
+		    / sample_period;
+		double   scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(
+		    ((double)cnt_all->curbytes) * scale_factor);
+		uint64_t curobjs = (uint64_t)round(
+		    ((double)cnt_all->curobjs) * scale_factor);
 
-		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
-		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
-		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
-		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Leak approximation summary: ~%" FMTu64
+		    " byte%s, ~%" FMTu64 " object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs,
+		    (curobjs != 1) ? "s" : "", leak_ngctx,
+		    (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on dump output for leak detail\n");
 		if (opt_prof_leak_error) {
@@ -1065,8 +1043,8 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
     size_t *leak_ngctx, prof_gctx_tree_t *gctxs) {
 	size_t tabind;
 	union {
-		prof_gctx_t	*p;
-		void		*v;
+		prof_gctx_t *p;
+		void        *v;
 	} gctx;
 
 	prof_enter(tsd, tdata);
@@ -1085,19 +1063,19 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
 	 * stats and merge them into the associated gctx's.
 	 */
 	memset(cnt_all, 0, sizeof(prof_cnt_t));
-	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {tsd_tsdn(tsd),
-	    cnt_all};
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {
+	    tsd_tsdn(tsd), cnt_all};
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    &prof_tdata_merge_iter_arg);
+	tdata_tree_iter(
+	    &tdatas, NULL, prof_tdata_merge_iter, &prof_tdata_merge_iter_arg);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
 	*leak_ngctx = 0;
-	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {tsd_tsdn(tsd),
-	    leak_ngctx};
-	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    &prof_gctx_merge_iter_arg);
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {
+	    tsd_tsdn(tsd), leak_ngctx};
+	gctx_tree_iter(
+	    gctxs, NULL, prof_gctx_merge_iter, &prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 }
@@ -1106,12 +1084,12 @@ void
 prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
-	prof_cnt_t cnt_all;
-	size_t leak_ngctx;
+	prof_cnt_t       cnt_all;
+	size_t           leak_ngctx;
 	prof_gctx_tree_t gctxs;
 	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
-	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
-	    prof_dump_write, cbopaque};
+	prof_dump_iter_arg_t prof_dump_iter_arg = {
+	    tsd_tsdn(tsd), prof_dump_write, cbopaque};
 	prof_dump_header(&prof_dump_iter_arg, &cnt_all);
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
@@ -1123,12 +1101,12 @@ prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
 /* Used in unit tests. */
 void
 prof_cnt_all(prof_cnt_t *cnt_all) {
-	tsd_t *tsd = tsd_fetch();
+	tsd_t        *tsd = tsd_fetch();
 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
 		memset(cnt_all, 0, sizeof(prof_cnt_t));
 	} else {
-		size_t leak_ngctx;
+		size_t           leak_ngctx;
 		prof_gctx_tree_t gctxs;
 		prof_dump_prep(tsd, tdata, cnt_all, &leak_ngctx, &gctxs);
 		prof_gctx_finish(tsd, &gctxs);
@@ -1167,23 +1145,31 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
-	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	size_t tdata_sz = ALIGNMENT_CEILING(sizeof(prof_tdata_t), QUANTUM);
+	size_t total_sz = tdata_sz + sizeof(void *) * opt_prof_bt_max;
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), total_sz,
+	    sz_size2index(total_sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL) {
 		return NULL;
 	}
 
+	tdata->vec = (void **)((byte_t *)tdata + tdata_sz);
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
 	tdata->thr_discrim = thr_discrim;
-	tdata->thread_name = thread_name;
 	tdata->attached = true;
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
+	if (thread_name == NULL) {
+		prof_thread_name_clear(tdata);
+	} else {
+		prof_thread_name_write_tdata(tdata, thread_name);
+	}
+	prof_thread_name_assert(tdata);
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
-	    prof_bt_keycomp)) {
+	        prof_bt_keycomp)) {
 		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
 		return NULL;
 	}
@@ -1214,27 +1200,22 @@ prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
 }
 
 static bool
-prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached) {
+prof_tdata_should_destroy(
+    tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached) {
 	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
 	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
 }
 
 static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached) {
+prof_tdata_destroy_locked(
+    tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
 
 	tdata_tree_remove(&tdatas, tdata);
-
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-	}
 	ckh_delete(tsd, &tdata->bt2tctx);
 	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
 }
@@ -1252,8 +1233,8 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
-		    true);
+		destroy_tdata = prof_tdata_should_destroy(
+		    tsd_tsdn(tsd), tdata, true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
@@ -1288,8 +1269,8 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 }
 
 static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *arg) {
+prof_tdata_reset_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *arg) {
 	tsdn_t *tsdn = (tsdn_t *)arg;
 
 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
@@ -1309,8 +1290,8 @@ prof_reset(tsd_t *tsd, size_t lg_sample) {
 
 	next = NULL;
 	do {
-		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
+		prof_tdata_t *to_destroy = tdata_tree_iter(
+		    &tdatas, next, prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
 			prof_tdata_destroy_locked(tsd, to_destroy, false);
@@ -1373,8 +1354,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 		prof_tdata_t *tdata = tctx->tdata;
 		tctx->tdata = NULL;
 		ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-		bool destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd),
-		    tdata, false);
+		bool destroy_tdata = prof_tdata_should_destroy(
+		    tsd_tsdn(tsd), tdata, false);
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (destroy_tdata) {
 			prof_tdata_destroy(tsd, tdata, false);
@@ -1419,6 +1400,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 		destroy_tctx = false;
 		destroy_gctx = false;
 		break;
+	case prof_tctx_state_initializing:
+	case prof_tctx_state_purgatory:
 	default:
 		not_reached();
 		destroy_tctx = false;
diff --git a/src/prof_log.c b/src/prof_log.c
index 0632c3b3..74f1372f 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -12,7 +12,7 @@
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_sys.h"
 
-bool opt_prof_log = false;
+bool                              opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
 enum prof_logging_state_e {
 	prof_logging_state_stopped,
@@ -25,15 +25,15 @@ enum prof_logging_state_e {
  * - started: log_start called, log_stop not called yet. Allocations are logged.
  * - dumping: log_stop called but not finished; samples are not logged anymore.
  */
-prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
+static prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
 
 /* Used in unit tests. */
 static bool prof_log_dummy = false;
 
 /* Incremented for every log file that is output. */
 static uint64_t log_seq = 0;
-static char log_filename[
-    /* Minimize memory bloat for non-prof builds. */
+static char     log_filename[
+/* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PATH_MAX +
 #endif
@@ -51,8 +51,8 @@ typedef struct prof_bt_node_s prof_bt_node_t;
 
 struct prof_bt_node_s {
 	prof_bt_node_t *next;
-	size_t index;
-	prof_bt_t bt;
+	size_t          index;
+	prof_bt_t       bt;
 	/* Variable size backtrace vector pointed to by bt. */
 	void *vec[1];
 };
@@ -61,8 +61,8 @@ typedef struct prof_thr_node_s prof_thr_node_t;
 
 struct prof_thr_node_s {
 	prof_thr_node_t *next;
-	size_t index;
-	uint64_t thr_uid;
+	size_t           index;
+	uint64_t         thr_uid;
 	/* Variable size based on thr_name_sz. */
 	char name[1];
 };
@@ -91,15 +91,15 @@ struct prof_alloc_node_s {
  * These are the backtraces and threads that have already been logged by an
  * allocation.
  */
-static bool log_tables_initialized = false;
+static bool  log_tables_initialized = false;
 static ckh_t log_bt_node_set;
 static ckh_t log_thr_node_set;
 
 /* Store linked lists for logged data. */
-static prof_bt_node_t *log_bt_first = NULL;
-static prof_bt_node_t *log_bt_last = NULL;
-static prof_thr_node_t *log_thr_first = NULL;
-static prof_thr_node_t *log_thr_last = NULL;
+static prof_bt_node_t    *log_bt_first = NULL;
+static prof_bt_node_t    *log_bt_last = NULL;
+static prof_thr_node_t   *log_thr_first = NULL;
+static prof_thr_node_t   *log_thr_last = NULL;
 static prof_alloc_node_t *log_alloc_first = NULL;
 static prof_alloc_node_t *log_alloc_last = NULL;
 
@@ -131,12 +131,12 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 
 	/* See if this backtrace is already cached in the table. */
 	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_bt_node_t, vec) +
-			        (bt->len * sizeof(void *));
-		prof_bt_node_t *new_node = (prof_bt_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
+	        (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_bt_node_t, vec)
+		    + (bt->len * sizeof(void *));
+		prof_bt_node_t *new_node = (prof_bt_node_t *)iallocztm(
+		    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+		    arena_get(TSDN_NULL, 0, true), true);
 		if (log_bt_first == NULL) {
 			log_bt_first = new_node;
 			log_bt_last = new_node;
@@ -174,11 +174,11 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 
 	/* See if this thread is already cached in the table. */
 	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
+	        (void **)(&node), NULL)) {
 		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
-		prof_thr_node_t *new_node = (prof_thr_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
+		prof_thr_node_t *new_node = (prof_thr_node_t *)iallocztm(
+		    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+		    arena_get(TSDN_NULL, 0, true), true);
 		if (log_thr_first == NULL) {
 			log_thr_first = new_node;
 			log_thr_last = new_node;
@@ -225,9 +225,9 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 
 	if (!log_tables_initialized) {
 		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-				prof_bt_node_hash, prof_bt_node_keycomp);
+		    prof_bt_node_hash, prof_bt_node_keycomp);
 		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-				prof_thr_node_hash, prof_thr_node_keycomp);
+		    prof_thr_node_hash, prof_thr_node_keycomp);
 		if (err1 || err2) {
 			goto label_done;
 		}
@@ -238,13 +238,12 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	nstime_t free_time;
 	nstime_prof_init_update(&free_time);
 
-	size_t sz = sizeof(prof_alloc_node_t);
-	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
-	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+	size_t             sz = sizeof(prof_alloc_node_t);
+	prof_alloc_node_t *new_node = (prof_alloc_node_t *)iallocztm(
+	    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 
-	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
-				        "" : tctx->tdata->thread_name;
+	const char *prod_thr_name = tctx->tdata->thread_name;
 	const char *cons_thr_name = prof_thread_name_get(tsd);
 
 	prof_bt_t bt;
@@ -257,10 +256,10 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_bt_t *prod_bt = &tctx->gctx->bt;
 
 	new_node->next = NULL;
-	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
-				      prod_thr_name);
-	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
-				     cons_thr_name);
+	new_node->alloc_thr_ind = prof_log_thr_index(
+	    tsd, tctx->tdata->thr_uid, prod_thr_name);
+	new_node->free_thr_ind = prof_log_thr_index(
+	    tsd, cons_tdata->thr_uid, cons_thr_name);
 	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
 	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
 	new_node->alloc_time_ns = nstime_ns(&alloc_time);
@@ -289,8 +288,8 @@ static bool
 prof_bt_node_keycomp(const void *k1, const void *k2) {
 	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
 	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
-	return prof_bt_keycomp((void *)(&bt_node1->bt),
-	    (void *)(&bt_node2->bt));
+	return prof_bt_keycomp(
+	    (void *)(&bt_node1->bt), (void *)(&bt_node2->bt));
 }
 
 static void
@@ -310,7 +309,7 @@ prof_thr_node_keycomp(const void *k1, const void *k2) {
 size_t
 prof_log_bt_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t          cnt = 0;
 	prof_bt_node_t *node = log_bt_first;
 	while (node != NULL) {
 		cnt++;
@@ -323,7 +322,7 @@ prof_log_bt_count(void) {
 size_t
 prof_log_alloc_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t             cnt = 0;
 	prof_alloc_node_t *node = log_alloc_first;
 	while (node != NULL) {
 		cnt++;
@@ -336,7 +335,7 @@ prof_log_alloc_count(void) {
 size_t
 prof_log_thr_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t           cnt = 0;
 	prof_thr_node_t *node = log_thr_first;
 	while (node != NULL) {
 		cnt++;
@@ -375,9 +374,8 @@ prof_log_rep_check(void) {
 	size_t thr_count = prof_log_thr_count();
 	size_t alloc_count = prof_log_alloc_count();
 
-
 	if (prof_logging_state == prof_logging_state_stopped) {
-		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
+		if (bt_count != 0 || thr_count != 0 || alloc_count != 0) {
 			return true;
 		}
 	}
@@ -436,7 +434,8 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	if (!prof_log_atexit_called) {
 		prof_log_atexit_called = true;
 		if (atexit(prof_log_stop_final) != 0) {
-			malloc_write("<jemalloc>: Error in atexit() "
+			malloc_write(
+			    "<jemalloc>: Error in atexit() "
 			    "for logging\n");
 			if (opt_abort) {
 				abort();
@@ -470,14 +469,14 @@ label_done:
 }
 
 struct prof_emitter_cb_arg_s {
-	int fd;
+	int     fd;
 	ssize_t ret;
 };
 
 static void
 prof_emitter_write_cb(void *opaque, const char *to_write) {
-	struct prof_emitter_cb_arg_s *arg =
-	    (struct prof_emitter_cb_arg_s *)opaque;
+	struct prof_emitter_cb_arg_s *arg = (struct prof_emitter_cb_arg_s *)
+	    opaque;
 	size_t bytes = strlen(to_write);
 	if (prof_log_dummy) {
 		return;
@@ -502,8 +501,8 @@ prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
 
 		char *thr_name = thr_node->name;
 
-		emitter_json_kv(emitter, "thr_name", emitter_type_string,
-		    &thr_name);
+		emitter_json_kv(
+		    emitter, "thr_name", emitter_type_string, &thr_name);
 
 		emitter_json_object_end(emitter);
 		thr_old_node = thr_node;
@@ -522,7 +521,7 @@ prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 	 * Calculate how many hex digits we need: twice number of bytes, two for
 	 * "0x", and then one more for terminating '\0'.
 	 */
-	char buf[2 * sizeof(intptr_t) + 3];
+	char   buf[2 * sizeof(intptr_t) + 3];
 	size_t buf_sz = sizeof(buf);
 	while (bt_node != NULL) {
 		emitter_json_array_begin(emitter);
@@ -530,8 +529,8 @@ prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 		for (i = 0; i < bt_node->bt.len; i++) {
 			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
 			char *trace_str = buf;
-			emitter_json_value(emitter, emitter_type_string,
-			    &trace_str);
+			emitter_json_value(
+			    emitter, emitter_type_string, &trace_str);
 		}
 		emitter_json_array_end(emitter);
 
@@ -562,21 +561,21 @@ prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
 		emitter_json_kv(emitter, "free_trace", emitter_type_size,
 		    &alloc_node->free_bt_ind);
 
-		emitter_json_kv(emitter, "alloc_timestamp",
-		    emitter_type_uint64, &alloc_node->alloc_time_ns);
+		emitter_json_kv(emitter, "alloc_timestamp", emitter_type_uint64,
+		    &alloc_node->alloc_time_ns);
 
 		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
 		    &alloc_node->free_time_ns);
 
-		emitter_json_kv(emitter, "usize", emitter_type_uint64,
-		    &alloc_node->usize);
+		emitter_json_kv(
+		    emitter, "usize", emitter_type_uint64, &alloc_node->usize);
 
 		emitter_json_object_end(emitter);
 
 		alloc_old_node = alloc_node;
 		alloc_node = alloc_node->next;
-		idalloctm(tsd_tsdn(tsd), alloc_old_node, NULL, NULL, true,
-		    true);
+		idalloctm(
+		    tsd_tsdn(tsd), alloc_old_node, NULL, NULL, true, true);
 	}
 	emitter_json_array_end(emitter);
 }
@@ -592,15 +591,14 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
 
 	char *vers = JEMALLOC_VERSION;
-	emitter_json_kv(emitter, "version",
-	    emitter_type_string, &vers);
+	emitter_json_kv(emitter, "version", emitter_type_string, &vers);
 
-	emitter_json_kv(emitter, "lg_sample_rate",
-	    emitter_type_int, &lg_prof_sample);
+	emitter_json_kv(
+	    emitter, "lg_sample_rate", emitter_type_int, &lg_prof_sample);
 
 	const char *res_type = prof_time_res_mode_names[opt_prof_time_res];
-	emitter_json_kv(emitter, "prof_time_resolution", emitter_type_string,
-	    &res_type);
+	emitter_json_kv(
+	    emitter, "prof_time_resolution", emitter_type_string, &res_type);
 
 	int pid = prof_getpid();
 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
@@ -633,7 +631,6 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_logging_state = prof_logging_state_dumping;
 	malloc_mutex_unlock(tsdn, &log_mtx);
 
-
 	emitter_t emitter;
 
 	/* Create a file. */
@@ -646,8 +643,10 @@ prof_log_stop(tsdn_t *tsdn) {
 	}
 
 	if (fd == -1) {
-		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
-			      " failed with %d\n", log_filename, errno);
+		malloc_printf(
+		    "<jemalloc>: creat() for log file \"%s\" "
+		    " failed with %d\n",
+		    log_filename, errno);
 		if (opt_abort) {
 			abort();
 		}
@@ -660,8 +659,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	buf_writer_t buf_writer;
 	buf_writer_init(tsdn, &buf_writer, prof_emitter_write_cb, &arg, NULL,
 	    PROF_LOG_STOP_BUFSIZE);
-	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
-	    &buf_writer);
+	emitter_init(
+	    &emitter, emitter_output_json_compact, buf_writer_cb, &buf_writer);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -702,8 +701,8 @@ JEMALLOC_COLD
 bool
 prof_log_init(tsd_t *tsd) {
 	cassert(config_prof);
-	if (malloc_mutex_init(&log_mtx, "prof_log",
-	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+	if (malloc_mutex_init(&log_mtx, "prof_log", WITNESS_RANK_PROF_LOG,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 834a9446..f7108bee 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -7,22 +7,22 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_recent.h"
 
-ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
-malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
+ssize_t            opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
+malloc_mutex_t     prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
-static ssize_t prof_recent_alloc_count = 0;
+static ssize_t     prof_recent_alloc_count = 0;
 prof_recent_list_t prof_recent_alloc_list;
 
 malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
 
 static void
-prof_recent_alloc_max_init() {
-	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
-	    ATOMIC_RELAXED);
+prof_recent_alloc_max_init(void) {
+	atomic_store_zd(
+	    &prof_recent_alloc_max, opt_prof_recent_alloc_max, ATOMIC_RELAXED);
 }
 
 static inline ssize_t
-prof_recent_alloc_max_get_no_lock() {
+prof_recent_alloc_max_get_no_lock(void) {
 	return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
 }
 
@@ -144,26 +144,26 @@ edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
 static inline prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_t *recent_alloc =
-	    edata_prof_recent_alloc_get_no_lock(edata);
-	assert(recent_alloc == NULL ||
-	    prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
+	prof_recent_t *recent_alloc = edata_prof_recent_alloc_get_no_lock(
+	    edata);
+	assert(recent_alloc == NULL
+	    || prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
 	return recent_alloc;
 }
 
 static prof_recent_t *
-edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_update_internal(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_t *old_recent_alloc =
-	    edata_prof_recent_alloc_get(tsd, edata);
+	prof_recent_t *old_recent_alloc = edata_prof_recent_alloc_get(
+	    tsd, edata);
 	edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
 	return old_recent_alloc;
 }
 
 static void
-edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_set(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
@@ -173,8 +173,8 @@ edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
 }
 
 static void
-edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_reset(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
@@ -265,14 +265,14 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 	if (!config_debug) {
 		return;
 	}
-	ssize_t count = 0;
+	ssize_t        count = 0;
 	prof_recent_t *n;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++count;
 	}
 	assert(count == prof_recent_alloc_count);
-	assert(prof_recent_alloc_max_get(tsd) == -1 ||
-	    count <= prof_recent_alloc_max_get(tsd));
+	assert(prof_recent_alloc_max_get(tsd) == -1
+	    || count <= prof_recent_alloc_max_get(tsd));
 }
 
 void
@@ -319,8 +319,8 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
 	 * the allocation locks.
 	 */
 	prof_recent_t *reserve = NULL;
-	if (prof_recent_alloc_max_get(tsd) == -1 ||
-	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
+	if (prof_recent_alloc_max_get(tsd) == -1
+	    || prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
 		assert(prof_recent_alloc_max_get(tsd) != 0);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		reserve = prof_recent_allocate_node(tsd_tsdn(tsd));
@@ -346,8 +346,9 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
 		ql_rotate(&prof_recent_alloc_list, link);
 	} else {
 		/* Otherwise make use of the new node. */
-		assert(prof_recent_alloc_max_get(tsd) == -1 ||
-		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
+		assert(prof_recent_alloc_max_get(tsd) == -1
+		    || prof_recent_alloc_count
+		        < prof_recent_alloc_max_get(tsd));
 		if (reserve == NULL) {
 			goto label_rollback;
 		}
@@ -403,7 +404,7 @@ label_rollback:
 }
 
 ssize_t
-prof_recent_alloc_max_ctl_read() {
+prof_recent_alloc_max_ctl_read(void) {
 	cassert(config_prof);
 	/* Don't bother to acquire the lock. */
 	return prof_recent_alloc_max_get_no_lock();
@@ -421,7 +422,7 @@ prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
 	}
 
 	prof_recent_t *node;
-	ql_foreach(node, &prof_recent_alloc_list, link) {
+	ql_foreach (node, &prof_recent_alloc_list, link) {
 		if (prof_recent_alloc_count == max) {
 			break;
 		}
@@ -462,7 +463,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 	assert(max >= -1);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_alloc_assert_count(tsd);
-	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
+	const ssize_t      old_max = prof_recent_alloc_max_update(tsd, max);
 	prof_recent_list_t to_delete;
 	prof_recent_alloc_restore_locked(tsd, &to_delete);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -472,7 +473,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 
 static void
 prof_recent_alloc_dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
-	char bt_buf[2 * sizeof(intptr_t) + 3];
+	char  bt_buf[2 * sizeof(intptr_t) + 3];
 	char *s = bt_buf;
 	assert(tctx != NULL);
 	prof_bt_t *bt = &tctx->gctx->bt;
@@ -495,13 +496,14 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	    &node->alloc_tctx->thr_uid);
 	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
 	assert(alloc_tdata != NULL);
-	if (alloc_tdata->thread_name != NULL) {
+	if (!prof_thread_name_empty(alloc_tdata)) {
+		const char *thread_name = alloc_tdata->thread_name;
 		emitter_json_kv(emitter, "alloc_thread_name",
-		    emitter_type_string, &alloc_tdata->thread_name);
+		    emitter_type_string, &thread_name);
 	}
 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
-	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
-	    &alloc_time_ns);
+	emitter_json_kv(
+	    emitter, "alloc_time", emitter_type_uint64, &alloc_time_ns);
 	emitter_json_array_kv_begin(emitter, "alloc_trace");
 	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
 	emitter_json_array_end(emitter);
@@ -511,9 +513,10 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
 		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
 		assert(dalloc_tdata != NULL);
-		if (dalloc_tdata->thread_name != NULL) {
+		if (!prof_thread_name_empty(dalloc_tdata)) {
+			const char *thread_name = dalloc_tdata->thread_name;
 			emitter_json_kv(emitter, "dalloc_thread_name",
-			    emitter_type_string, &dalloc_tdata->thread_name);
+			    emitter_type_string, &thread_name);
 		}
 		assert(!nstime_equals_zero(&node->dalloc_time));
 		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);
@@ -537,8 +540,8 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
-	    &buf_writer);
+	emitter_init(
+	    &emitter, emitter_output_json_compact, buf_writer_cb, &buf_writer);
 	prof_recent_list_t temp_list;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -552,13 +555,13 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 
 	emitter_begin(&emitter);
 	uint64_t sample_interval = (uint64_t)1U << lg_prof_sample;
-	emitter_json_kv(&emitter, "sample_interval", emitter_type_uint64,
-	    &sample_interval);
-	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
-	    &dump_max);
+	emitter_json_kv(
+	    &emitter, "sample_interval", emitter_type_uint64, &sample_interval);
+	emitter_json_kv(
+	    &emitter, "recent_alloc_max", emitter_type_ssize, &dump_max);
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
 	prof_recent_t *node;
-	ql_foreach(node, &temp_list, link) {
+	ql_foreach (node, &temp_list, link) {
 		prof_recent_alloc_dump_node(&emitter, node);
 	}
 	emitter_json_array_end(&emitter);
@@ -580,17 +583,17 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 #undef PROF_RECENT_PRINT_BUFSIZE
 
 bool
-prof_recent_init() {
+prof_recent_init(void) {
 	cassert(config_prof);
 	prof_recent_alloc_max_init();
 
 	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",
-	    WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
 	if (malloc_mutex_init(&prof_recent_dump_mtx, "prof_recent_dump",
-	    WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
new file mode 100644
index 00000000..8ebcab8e
--- /dev/null
+++ b/src/prof_stack_range.c
@@ -0,0 +1,175 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/prof_sys.h"
+
+#if defined(__linux__) && defined(JEMALLOC_HAVE_GETTID)
+
+#	include <errno.h>
+#	include <fcntl.h>
+#	include <stdio.h>
+#	include <stdlib.h> // strtoul
+#	include <string.h>
+#	include <unistd.h>
+
+/*
+ * Converts a string representing a hexadecimal number to an unsigned long long
+ * integer. Functionally equivalent to strtoull() (for base 16) but faster for
+ * that case.
+ *
+ * @param nptr Pointer to the string to be converted.
+ * @param endptr Pointer to a pointer to character, which will be set to the
+ * character in `nptr` where parsing stopped. Can be NULL.
+ * @return The converted unsigned long long integer value.
+ */
+static inline unsigned long long int
+strtoull_hex(const char *nptr, char **endptr) {
+	unsigned long long int val = 0;
+	int                    ii = 0;
+	for (; ii < 16; ++ii) {
+		char c = nptr[ii];
+		if (c >= '0' && c <= '9') {
+			val = (val << 4) + (c - '0');
+		} else if (c >= 'a' && c <= 'f') {
+			val = (val << 4) + (c - 'a' + 10);
+		} else {
+			break;
+		}
+	}
+	if (endptr) {
+		*endptr = (char *)(nptr + ii);
+	}
+	return val;
+}
+
+static int
+prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
+    uintptr_t *mm_start, uintptr_t *mm_end) {
+	int ret = ENOENT; /* not found */
+	*mm_start = *mm_end = 0;
+
+	/*
+     * Each line of /proc/<pid>/maps is:
+     * <start>-<end> <perms> <offset> <dev> <inode> <pathname>
+     *
+     * The fields we care about are always within the first 34 characters so
+     * as long as `buf` contains the start of a mapping line it can always be
+     * parsed.
+     */
+	static const int kMappingFieldsWidth = 34;
+
+	int     fd = -1;
+	char    buf[4096];
+	ssize_t remaining = 0; /* actual number of bytes read to buf */
+	char   *line = NULL;
+
+	while (1) {
+		if (fd < 0) {
+			/* case 0: initial open of maps file */
+			fd = malloc_open(maps_path, O_RDONLY);
+			if (fd < 0) {
+				return errno;
+			}
+
+			remaining = malloc_read_fd(fd, buf, sizeof(buf));
+			if (remaining < 0) {
+				ret = errno;
+				break;
+			} else if (remaining == 0) {
+				break;
+			}
+			line = buf;
+		} else if (line == NULL) {
+			/* case 1: no newline found in buf */
+			remaining = malloc_read_fd(fd, buf, sizeof(buf));
+			if (remaining < 0) {
+				ret = errno;
+				break;
+			} else if (remaining == 0) {
+				break;
+			}
+			line = memchr(buf, '\n', remaining);
+			if (line != NULL) {
+				line++; /* advance to character after newline */
+				remaining -= (line - buf);
+			}
+		} else if (line != NULL && remaining < kMappingFieldsWidth) {
+			/*
+             * case 2: found newline but insufficient characters remaining in
+             * buf
+             */
+			memcpy(buf, line,
+			    remaining); /* copy remaining characters to start of buf */
+			line = buf;
+
+			ssize_t count = malloc_read_fd(
+			    fd, buf + remaining, sizeof(buf) - remaining);
+			if (count < 0) {
+				ret = errno;
+				break;
+			} else if (count == 0) {
+				break;
+			}
+
+			remaining +=
+			    count; /* actual number of bytes read to buf */
+		} else {
+			/* case 3: found newline and sufficient characters to parse */
+
+			/* parse <start>-<end> */
+			char     *tmp = line;
+			uintptr_t start_addr = (uintptr_t)strtoull_hex(
+			    tmp, &tmp);
+			if (addr >= start_addr) {
+				tmp++; /* advance to character after '-' */
+				uintptr_t end_addr = (uintptr_t)strtoull_hex(
+				    tmp, NULL);
+				if (addr < end_addr) {
+					*mm_start = start_addr;
+					*mm_end = end_addr;
+					ret = 0;
+					break;
+				}
+			}
+
+			/* Advance to character after next newline in the current buf. */
+			char *prev_line = line;
+			line = memchr(line, '\n', remaining);
+			if (line != NULL) {
+				line++; /* advance to character after newline */
+				remaining -= (line - prev_line);
+			}
+		}
+	}
+
+	malloc_close(fd);
+	return ret;
+}
+
+int
+prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high) {
+	/*
+     * NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
+     * /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
+     * thread stack remains as the [stack] mapping. For other thread stacks the
+     * mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
+     * labeled as [STACK:tid]).
+     * https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
+    */
+	char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
+	malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps",
+	    getpid(), gettid());
+	return prof_mapping_containing_addr(fp, maps_path, low, high);
+}
+
+#else
+
+int
+prof_thread_stack_range(
+    UNUSED uintptr_t addr, uintptr_t *stack_start, uintptr_t *stack_end) {
+	*stack_start = *stack_end = 0;
+	return ENOENT;
+}
+
+#endif // __linux__
diff --git a/src/prof_stats.c b/src/prof_stats.c
index 5d1a506b..db248be7 100644
--- a/src/prof_stats.c
+++ b/src/prof_stats.c
@@ -3,8 +3,8 @@
 
 #include "jemalloc/internal/prof_stats.h"
 
-bool opt_prof_stats = false;
-malloc_mutex_t prof_stats_mtx;
+bool                opt_prof_stats = false;
+malloc_mutex_t      prof_stats_mtx;
 static prof_stats_t prof_stats_live[PROF_SC_NSIZES];
 static prof_stats_t prof_stats_accum[PROF_SC_NSIZES];
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index b5f1f5b2..be50c0be 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -1,15 +1,15 @@
-#define JEMALLOC_PROF_SYS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
-#define UNW_LOCAL_ONLY
-#include <libunwind.h>
+#	define UNW_LOCAL_ONLY
+#	include <libunwind.h>
 #endif
 
 #ifdef JEMALLOC_PROF_LIBGCC
@@ -18,17 +18,21 @@
  * use libgcc's unwinding functionality, but after we've included that, we've
  * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
  */
-#undef _Unwind_Backtrace
-#include <unwind.h>
-#define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#	undef _Unwind_Backtrace
+#	include <unwind.h>
+#	define _Unwind_Backtrace                                              \
+		JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#endif
+
+#ifdef JEMALLOC_PROF_FRAME_POINTER
+// execinfo backtrace() as fallback unwinder
+#	include <execinfo.h>
 #endif
 
 /******************************************************************************/
 
 malloc_mutex_t prof_dump_filename_mtx;
 
-bool prof_do_mock = false;
-
 static uint64_t prof_dump_seq;
 static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
@@ -55,9 +59,9 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	cassert(config_prof);
 	assert(*len == 0);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
-	nframes = unw_backtrace(vec, PROF_BT_MAX);
+	nframes = unw_backtrace(vec, max_len);
 	if (nframes <= 0) {
 		return;
 	}
@@ -74,7 +78,7 @@ prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
 static _Unwind_Reason_Code
 prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-	void *ip;
+	void               *ip;
 
 	cassert(config_prof);
 
@@ -97,32 +101,132 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	cassert(config_prof);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
-#elif (defined(JEMALLOC_PROF_GCC))
+#elif (defined(JEMALLOC_PROF_FRAME_POINTER))
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+
+struct stack_range {
+	uintptr_t start;
+	uintptr_t end;
+};
+
+struct thread_unwind_info {
+	struct stack_range stack_range;
+	bool               fallback;
+};
+static __thread struct thread_unwind_info unwind_info = {
+    .stack_range =
+        {
+            .start = 0,
+            .end = 0,
+        },
+    .fallback = false,
+}; /* thread local */
+
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
-#define BT_FRAME(i)							\
-	if ((i) < max_len) {						\
-		void *p;						\
-		if (__builtin_frame_address(i) == 0) {			\
-			return;						\
-		}							\
-		p = __builtin_return_address(i);			\
-		if (p == NULL) {					\
-			return;						\
-		}							\
-		vec[(i)] = p;						\
-		*len = (i) + 1;						\
-	} else {							\
-		return;							\
+	/* fp: 		current stack frame pointer
+	 *
+	 * stack_range:	readable stack memory range for the current thread.
+	 *		Used to validate frame addresses during stack unwinding.
+	 *		For most threads there is a single valid stack range
+	 *		that is fixed at thread creation time.  This may not be
+	 *		the case when folly fibers or boost contexts are used.
+	 *		In those cases fall back to using execinfo backtrace()
+	 *		(DWARF unwind).
+	 */
+
+	/* always safe to get the current stack frame address */
+	uintptr_t fp = (uintptr_t)__builtin_frame_address(0);
+
+	/* new thread - get the stack range */
+	if (!unwind_info.fallback
+	    && unwind_info.stack_range.start == unwind_info.stack_range.end) {
+		if (prof_thread_stack_range(fp, &unwind_info.stack_range.start,
+		        &unwind_info.stack_range.end)
+		    != 0) {
+			unwind_info.fallback = true;
+		} else {
+			assert(fp >= unwind_info.stack_range.start
+			    && fp < unwind_info.stack_range.end);
+		}
 	}
 
+	if (unwind_info.fallback) {
+		goto label_fallback;
+	}
+
+	unsigned ii = 0;
+	while (ii < max_len && fp != 0) {
+		if (fp < unwind_info.stack_range.start
+		    || fp >= unwind_info.stack_range.end) {
+			/*
+			 * Determining the stack range from procfs can be
+			 * relatively expensive especially for programs with
+			 * many threads / shared libraries.  If the stack
+			 * range has changed, it is likely to change again
+			 * in the future (fibers or some other stack
+			 * manipulation).  So fall back to backtrace for this
+			 * thread.
+			 */
+			unwind_info.fallback = true;
+			goto label_fallback;
+		}
+		void *ip = ((void **)fp)[1];
+		if (ip == 0) {
+			break;
+		}
+		vec[ii++] = ip;
+		fp = ((uintptr_t *)fp)[0];
+	}
+	*len = ii;
+	return;
+
+label_fallback:
+	/*
+	 * Using the backtrace from execinfo.h here.  Note that it may get
+	 * redirected to libunwind when a libunwind not built with build-time
+	 * flag --disable-weak-backtrace is linked.
+	 */
+	assert(unwind_info.fallback);
+	int nframes = backtrace(vec, max_len);
+	if (nframes > 0) {
+		*len = nframes;
+	} else {
+		*len = 0;
+	}
+}
+
+JEMALLOC_DIAGNOSTIC_POP
+#elif (defined(JEMALLOC_PROF_GCC))
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+static void
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
+/* The input arg must be a constant for __builtin_return_address. */
+#	define BT_FRAME(i)                                                    \
+		if ((i) < max_len) {                                           \
+			void *p;                                               \
+			if (__builtin_frame_address(i) == 0) {                 \
+				return;                                        \
+			}                                                      \
+			p = __builtin_return_address(i);                       \
+			if (p == NULL) {                                       \
+				return;                                        \
+			}                                                      \
+			vec[(i)] = p;                                          \
+			*len = (i) + 1;                                        \
+		} else {                                                       \
+			return;                                                \
+		}
+
 	cassert(config_prof);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
 	BT_FRAME(0)
 	BT_FRAME(1)
@@ -264,7 +368,149 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	BT_FRAME(125)
 	BT_FRAME(126)
 	BT_FRAME(127)
-#undef BT_FRAME
+	BT_FRAME(128)
+	BT_FRAME(129)
+
+	BT_FRAME(130)
+	BT_FRAME(131)
+	BT_FRAME(132)
+	BT_FRAME(133)
+	BT_FRAME(134)
+	BT_FRAME(135)
+	BT_FRAME(136)
+	BT_FRAME(137)
+	BT_FRAME(138)
+	BT_FRAME(139)
+
+	BT_FRAME(140)
+	BT_FRAME(141)
+	BT_FRAME(142)
+	BT_FRAME(143)
+	BT_FRAME(144)
+	BT_FRAME(145)
+	BT_FRAME(146)
+	BT_FRAME(147)
+	BT_FRAME(148)
+	BT_FRAME(149)
+
+	BT_FRAME(150)
+	BT_FRAME(151)
+	BT_FRAME(152)
+	BT_FRAME(153)
+	BT_FRAME(154)
+	BT_FRAME(155)
+	BT_FRAME(156)
+	BT_FRAME(157)
+	BT_FRAME(158)
+	BT_FRAME(159)
+
+	BT_FRAME(160)
+	BT_FRAME(161)
+	BT_FRAME(162)
+	BT_FRAME(163)
+	BT_FRAME(164)
+	BT_FRAME(165)
+	BT_FRAME(166)
+	BT_FRAME(167)
+	BT_FRAME(168)
+	BT_FRAME(169)
+
+	BT_FRAME(170)
+	BT_FRAME(171)
+	BT_FRAME(172)
+	BT_FRAME(173)
+	BT_FRAME(174)
+	BT_FRAME(175)
+	BT_FRAME(176)
+	BT_FRAME(177)
+	BT_FRAME(178)
+	BT_FRAME(179)
+
+	BT_FRAME(180)
+	BT_FRAME(181)
+	BT_FRAME(182)
+	BT_FRAME(183)
+	BT_FRAME(184)
+	BT_FRAME(185)
+	BT_FRAME(186)
+	BT_FRAME(187)
+	BT_FRAME(188)
+	BT_FRAME(189)
+
+	BT_FRAME(190)
+	BT_FRAME(191)
+	BT_FRAME(192)
+	BT_FRAME(193)
+	BT_FRAME(194)
+	BT_FRAME(195)
+	BT_FRAME(196)
+	BT_FRAME(197)
+	BT_FRAME(198)
+	BT_FRAME(199)
+
+	BT_FRAME(200)
+	BT_FRAME(201)
+	BT_FRAME(202)
+	BT_FRAME(203)
+	BT_FRAME(204)
+	BT_FRAME(205)
+	BT_FRAME(206)
+	BT_FRAME(207)
+	BT_FRAME(208)
+	BT_FRAME(209)
+
+	BT_FRAME(210)
+	BT_FRAME(211)
+	BT_FRAME(212)
+	BT_FRAME(213)
+	BT_FRAME(214)
+	BT_FRAME(215)
+	BT_FRAME(216)
+	BT_FRAME(217)
+	BT_FRAME(218)
+	BT_FRAME(219)
+
+	BT_FRAME(220)
+	BT_FRAME(221)
+	BT_FRAME(222)
+	BT_FRAME(223)
+	BT_FRAME(224)
+	BT_FRAME(225)
+	BT_FRAME(226)
+	BT_FRAME(227)
+	BT_FRAME(228)
+	BT_FRAME(229)
+
+	BT_FRAME(230)
+	BT_FRAME(231)
+	BT_FRAME(232)
+	BT_FRAME(233)
+	BT_FRAME(234)
+	BT_FRAME(235)
+	BT_FRAME(236)
+	BT_FRAME(237)
+	BT_FRAME(238)
+	BT_FRAME(239)
+
+	BT_FRAME(240)
+	BT_FRAME(241)
+	BT_FRAME(242)
+	BT_FRAME(243)
+	BT_FRAME(244)
+	BT_FRAME(245)
+	BT_FRAME(246)
+	BT_FRAME(247)
+	BT_FRAME(248)
+	BT_FRAME(249)
+
+	BT_FRAME(250)
+	BT_FRAME(251)
+	BT_FRAME(252)
+	BT_FRAME(253)
+	BT_FRAME(254)
+	BT_FRAME(255)
+#	undef BT_FRAME
+	JEMALLOC_DIAGNOSTIC_POP
 }
 #else
 static void
@@ -281,18 +527,20 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	assert(prof_backtrace_hook != NULL);
 
 	pre_reentrancy(tsd, NULL);
-	prof_backtrace_hook(bt->vec, &bt->len, PROF_BT_MAX);
+	prof_backtrace_hook(bt->vec, &bt->len, opt_prof_bt_max);
 	post_reentrancy(tsd);
 }
 
 void
-prof_hooks_init() {
+prof_hooks_init(void) {
 	prof_backtrace_hook_set(&prof_backtrace_impl);
 	prof_dump_hook_set(NULL);
+	prof_sample_hook_set(NULL);
+	prof_sample_free_hook_set(NULL);
 }
 
 void
-prof_unwind_init() {
+prof_unwind_init(void) {
 #ifdef JEMALLOC_PROF_LIBGCC
 	/*
 	 * Cause the backtracing machinery to allocate its internal
@@ -318,12 +566,18 @@ prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
 
 void
 prof_sys_thread_name_fetch(tsd_t *tsd) {
-#define THREAD_NAME_MAX_LEN 16
-	char buf[THREAD_NAME_MAX_LEN];
-	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
-		prof_thread_name_set_impl(tsd, buf);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return;
 	}
-#undef THREAD_NAME_MAX_LEN
+
+	if (prof_sys_thread_name_read(
+	        tdata->thread_name, PROF_THREAD_NAME_MAX_LEN)
+	    != 0) {
+		prof_thread_name_clear(tdata);
+	}
+
+	tdata->thread_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
 }
 
 int
@@ -335,6 +589,41 @@ prof_getpid(void) {
 #endif
 }
 
+static long
+prof_get_pid_namespace(void) {
+	long ret = 0;
+
+#if defined(_WIN32) || defined(__APPLE__)
+	// Not supported, do nothing.
+#else
+	char        buf[PATH_MAX];
+	const char *linkname =
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
+	    "/proc/curproc/ns/pid"
+#	else
+	    "/proc/self/ns/pid"
+#	endif
+	    ;
+	ssize_t linklen =
+#	ifndef JEMALLOC_READLINKAT
+	    readlink(linkname, buf, PATH_MAX)
+#	else
+	    readlinkat(AT_FDCWD, linkname, buf, PATH_MAX)
+#	endif
+	    ;
+
+	// namespace string is expected to be like pid:[4026531836]
+	if (linklen > 0) {
+		// Trim the trailing "]"
+		buf[linklen - 1] = '\0';
+		char *index = strtok(buf, "pid:[");
+		ret = atol(index);
+	}
+#endif
+
+	return ret;
+}
+
 /*
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps; protected by prof_dump_mtx.
@@ -362,8 +651,8 @@ struct prof_dump_arg_s {
 };
 
 static void
-prof_dump_check_possible_error(prof_dump_arg_t *arg, bool err_cond,
-    const char *format, ...) {
+prof_dump_check_possible_error(
+    prof_dump_arg_t *arg, bool err_cond, const char *format, ...) {
 	assert(!arg->error);
 	if (!err_cond) {
 		return;
@@ -375,7 +664,7 @@ prof_dump_check_possible_error(prof_dump_arg_t *arg, bool err_cond,
 	}
 
 	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
+	char    buf[PROF_PRINTF_BUFSIZE];
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
@@ -407,8 +696,8 @@ prof_dump_flush(void *opaque, const char *s) {
 	cassert(config_prof);
 	prof_dump_arg_t *arg = (prof_dump_arg_t *)opaque;
 	if (!arg->error) {
-		ssize_t err = prof_dump_write_file(arg->prof_dump_fd, s,
-		    strlen(s));
+		ssize_t err = prof_dump_write_file(
+		    arg->prof_dump_fd, s, strlen(s));
 		prof_dump_check_possible_error(arg, err == -1,
 		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
@@ -421,48 +710,118 @@ prof_dump_close(prof_dump_arg_t *arg) {
 	}
 }
 
-#ifndef _WIN32
+#ifdef __APPLE__
+#	include <mach-o/dyld.h>
+
+#	ifdef __LP64__
+typedef struct mach_header_64     mach_header_t;
+typedef struct segment_command_64 segment_command_t;
+#		define MH_MAGIC_VALUE MH_MAGIC_64
+#		define MH_CIGAM_VALUE MH_CIGAM_64
+#		define LC_SEGMENT_VALUE LC_SEGMENT_64
+#	else
+typedef struct mach_header     mach_header_t;
+typedef struct segment_command segment_command_t;
+#		define MH_MAGIC_VALUE MH_MAGIC
+#		define MH_CIGAM_VALUE MH_CIGAM
+#		define LC_SEGMENT_VALUE LC_SEGMENT
+#	endif
+
+static void
+prof_dump_dyld_image_vmaddr(buf_writer_t *buf_writer, uint32_t image_index) {
+	const mach_header_t *header = (const mach_header_t *)
+	    _dyld_get_image_header(image_index);
+	if (header == NULL
+	    || (header->magic != MH_MAGIC_VALUE
+	        && header->magic != MH_CIGAM_VALUE)) {
+		// Invalid header
+		return;
+	}
+
+	intptr_t             slide = _dyld_get_image_vmaddr_slide(image_index);
+	const char          *name = _dyld_get_image_name(image_index);
+	struct load_command *load_cmd = (struct load_command *)((char *)header
+	    + sizeof(mach_header_t));
+	for (uint32_t i = 0; load_cmd && (i < header->ncmds); i++) {
+		if (load_cmd->cmd == LC_SEGMENT_VALUE) {
+			const segment_command_t *segment_cmd =
+			    (const segment_command_t *)load_cmd;
+			if (!strcmp(segment_cmd->segname, "__TEXT")) {
+				char buffer[PATH_MAX + 1];
+				malloc_snprintf(buffer, sizeof(buffer),
+				    "%016llx-%016llx: %s\n",
+				    segment_cmd->vmaddr + slide,
+				    segment_cmd->vmaddr + slide
+				        + segment_cmd->vmsize,
+				    name);
+				buf_writer_cb(buf_writer, buffer);
+				return;
+			}
+		}
+		load_cmd = (struct load_command *)((char *)load_cmd
+		    + load_cmd->cmdsize);
+	}
+}
+
+static void
+prof_dump_dyld_maps(buf_writer_t *buf_writer) {
+	uint32_t image_count = _dyld_image_count();
+	for (uint32_t i = 0; i < image_count; i++) {
+		prof_dump_dyld_image_vmaddr(buf_writer, i);
+	}
+}
+
+prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps = NULL;
+
+static void
+prof_dump_maps(buf_writer_t *buf_writer) {
+	buf_writer_cb(buf_writer, "\nMAPPED_LIBRARIES:\n");
+	/* No proc map file to read on MacOS, dump dyld maps for backtrace. */
+	prof_dump_dyld_maps(buf_writer);
+}
+#else /* !__APPLE__ */
+#	ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
 prof_open_maps_internal(const char *format, ...) {
-	int mfd;
+	int     mfd;
 	va_list ap;
-	char filename[PATH_MAX + 1];
+	char    filename[PATH_MAX + 1];
 
 	va_start(ap, format);
 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
 	va_end(ap);
 
-#if defined(O_CLOEXEC)
+#		if defined(O_CLOEXEC)
 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
+#		else
 	mfd = open(filename, O_RDONLY);
 	if (mfd != -1) {
 		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
 	}
-#endif
+#		endif
 
 	return mfd;
 }
-#endif
+#	endif
 
 static int
-prof_dump_open_maps_impl() {
+prof_dump_open_maps_impl(void) {
 	int mfd;
 
 	cassert(config_prof);
-#if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
 	mfd = prof_open_maps_internal("/proc/curproc/map");
-#elif defined(_WIN32)
+#	elif defined(_WIN32)
 	mfd = -1; // Not implemented
-#else
+#	else
 	int pid = prof_getpid();
 
 	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
 	if (mfd == -1) {
 		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
 	}
-#endif
+#	endif
 	return mfd;
 }
 prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
@@ -486,14 +845,15 @@ prof_dump_maps(buf_writer_t *buf_writer) {
 	buf_writer_pipe(buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
+#endif /* __APPLE__ */
 
 static bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck) {
+prof_dump(
+    tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) {
 	cassert(config_prof);
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
 		return true;
 	}
@@ -540,7 +900,7 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 }
 
 static const char *
-prof_prefix_get(tsdn_t* tsdn) {
+prof_prefix_get(tsdn_t *tsdn) {
 	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
 
 	return prof_prefix == NULL ? opt_prof_prefix : prof_prefix;
@@ -564,15 +924,31 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	const char *prefix = prof_prefix_get(tsd_tsdn(tsd));
 
 	if (vseq != VSEQ_INVALID) {
-	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
-		    prof_dump_seq, v, vseq);
+		if (opt_prof_pid_namespace) {
+			/* "<prefix>.<pid_namespace>.<pid>.<seq>.v<vseq>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%ld.%d.%" FMTu64 ".%c%" FMTu64 ".heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(),
+			    prof_dump_seq, v, vseq);
+		} else {
+			/* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%d.%" FMTu64 ".%c%" FMTu64 ".heap", prefix,
+			    prof_getpid(), prof_dump_seq, v, vseq);
+		}
 	} else {
-	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
-		    prof_dump_seq, v);
+		if (opt_prof_pid_namespace) {
+			/* "<prefix>.<pid_namespace>.<pid>.<seq>.<v>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%ld.%d.%" FMTu64 ".%c.heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(),
+			    prof_dump_seq, v);
+		} else {
+			/* "<prefix>.<pid>.<seq>.<v>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%d.%" FMTu64 ".%c.heap", prefix, prof_getpid(),
+			    prof_dump_seq, v);
+		}
 	}
 	prof_dump_seq++;
 }
@@ -580,8 +956,15 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 void
 prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-	    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
+	if (opt_prof_pid_namespace) {
+		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+		    "%s.%ld.%d.%" FMTu64 ".json", prof_prefix_get(tsdn),
+		    prof_get_pid_namespace(), prof_getpid(), ind);
+	} else {
+		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+		    "%s.%d.%" FMTu64 ".json", prof_prefix_get(tsdn),
+		    prof_getpid(), ind);
+	}
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 }
 
@@ -600,12 +983,15 @@ bool
 prof_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
 	ctl_mtx_assert_held(tsdn);
+	if (prefix == NULL) {
+		return true;
+	}
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 	if (prof_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		/* Everything is still guarded by ctl_mtx. */
-		char *buffer = base_alloc(tsdn, prof_base,
-		    PROF_DUMP_FILENAME_LEN, QUANTUM);
+		char *buffer = base_alloc(
+		    tsdn, prof_base, PROF_DUMP_FILENAME_LEN, QUANTUM);
 		if (buffer == NULL) {
 			return true;
 		}
@@ -642,7 +1028,8 @@ prof_mdump_impl(tsd_t *tsd, const char *filename) {
 		/* No filename specified, so automatically generate one. */
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		if (prof_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
-			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+			malloc_mutex_unlock(
+			    tsd_tsdn(tsd), &prof_dump_filename_mtx);
 			return true;
 		}
 		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
diff --git a/src/psset.c b/src/psset.c
index 9a8f054f..4e904feb 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -11,7 +11,6 @@ psset_init(psset_t *psset) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
 	fb_init(psset->pageslab_bitmap, PSSET_NPSIZES);
-	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
 	for (int i = 0; i < PSSET_NPURGE_LISTS; i++) {
@@ -30,15 +29,19 @@ psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
 
 void
 psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
-	psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
-	psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
-	psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]);
-	psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]);
+	psset_bin_stats_accum(&dst->merged, &src->merged);
+	for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+		psset_bin_stats_accum(&dst->slabs[huge], &src->slabs[huge]);
+		psset_bin_stats_accum(
+		    &dst->full_slabs[huge], &src->full_slabs[huge]);
+		psset_bin_stats_accum(
+		    &dst->empty_slabs[huge], &src->empty_slabs[huge]);
+	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
-		    &src->nonfull_slabs[i][0]);
-		psset_bin_stats_accum(&dst->nonfull_slabs[i][1],
-		    &src->nonfull_slabs[i][1]);
+		psset_bin_stats_accum(
+		    &dst->nonfull_slabs[i][0], &src->nonfull_slabs[i][0]);
+		psset_bin_stats_accum(
+		    &dst->nonfull_slabs[i][1], &src->nonfull_slabs[i][1]);
 	}
 }
 
@@ -48,52 +51,92 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * bin) when we call psset_update_end.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
-    hpdata_t *ps, bool insert) {
+psset_slab_stats_insert_remove(psset_stats_t *stats,
+    psset_bin_stats_t *binstats, hpdata_t *ps, bool insert) {
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
+	size_t nactive = hpdata_nactive_get(ps);
+	size_t ndirty = hpdata_ndirty_get(ps);
+
+	stats->merged.npageslabs += mul * 1;
+	stats->merged.nactive += mul * nactive;
+	stats->merged.ndirty += mul * ndirty;
+
+	/*
+	 * Stats above are necessary for purging logic to work, everything
+	 * below is to improve observability, thense is optional, so we don't
+	 * update it, when stats disabled.
+	 */
+	if (!config_stats) {
+		return;
+	}
+
 	size_t huge_idx = (size_t)hpdata_huge_get(ps);
 
-	binstats[huge_idx].npageslabs += mul * 1;
-	binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
-	binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
+	stats->slabs[huge_idx].npageslabs += mul * 1;
+	stats->slabs[huge_idx].nactive += mul * nactive;
+	stats->slabs[huge_idx].ndirty += mul * ndirty;
 
-	psset->merged_stats.npageslabs += mul * 1;
-	psset->merged_stats.nactive += mul * hpdata_nactive_get(ps);
-	psset->merged_stats.ndirty += mul * hpdata_ndirty_get(ps);
+	binstats[huge_idx].npageslabs += mul * 1;
+	binstats[huge_idx].nactive += mul * nactive;
+	binstats[huge_idx].ndirty += mul * ndirty;
 
 	if (config_debug) {
-		psset_bin_stats_t check_stats = {0};
-		for (size_t huge = 0; huge <= 1; huge++) {
-			psset_bin_stats_accum(&check_stats,
-			    &psset->stats.full_slabs[huge]);
-			psset_bin_stats_accum(&check_stats,
-			    &psset->stats.empty_slabs[huge]);
+		psset_bin_stats_t check_stats[PSSET_NHUGE] = {{0}};
+		for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+			psset_bin_stats_accum(
+			    &check_stats[huge], &stats->full_slabs[huge]);
+			psset_bin_stats_accum(
+			    &check_stats[huge], &stats->empty_slabs[huge]);
 			for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
-				psset_bin_stats_accum(&check_stats,
-				    &psset->stats.nonfull_slabs[pind][huge]);
+				psset_bin_stats_accum(&check_stats[huge],
+				    &stats->nonfull_slabs[pind][huge]);
 			}
 		}
-		assert(psset->merged_stats.npageslabs
-		    == check_stats.npageslabs);
-		assert(psset->merged_stats.nactive == check_stats.nactive);
-		assert(psset->merged_stats.ndirty == check_stats.ndirty);
+
+		assert(stats->merged.npageslabs
+		    == check_stats[0].npageslabs + check_stats[1].npageslabs);
+		assert(stats->merged.nactive
+		    == check_stats[0].nactive + check_stats[1].nactive);
+		assert(stats->merged.ndirty
+		    == check_stats[0].ndirty + check_stats[1].ndirty);
+
+		for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+			assert(stats->slabs[huge].npageslabs
+			    == check_stats[huge].npageslabs);
+			assert(stats->slabs[huge].nactive
+			    == check_stats[huge].nactive);
+			assert(stats->slabs[huge].ndirty
+			    == check_stats[huge].ndirty);
+		}
 	}
 }
 
 static void
-psset_bin_stats_insert(psset_t *psset, psset_bin_stats_t *binstats,
-    hpdata_t *ps) {
-	psset_bin_stats_insert_remove(psset, binstats, ps, true);
+psset_slab_stats_insert(
+    psset_stats_t *stats, psset_bin_stats_t *binstats, hpdata_t *ps) {
+	psset_slab_stats_insert_remove(stats, binstats, ps, true);
 }
 
 static void
-psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
-    hpdata_t *ps) {
-	psset_bin_stats_insert_remove(psset, binstats, ps, false);
+psset_slab_stats_remove(
+    psset_stats_t *stats, psset_bin_stats_t *binstats, hpdata_t *ps) {
+	psset_slab_stats_insert_remove(stats, binstats, ps, false);
+}
+
+static pszind_t
+psset_hpdata_heap_index(const hpdata_t *ps) {
+	assert(!hpdata_full(ps));
+	assert(!hpdata_empty(ps));
+	size_t   longest_free_range = hpdata_longest_free_range_get(ps);
+	pszind_t pind = sz_psz2ind(
+	    sz_psz_quantize_floor(longest_free_range << LG_PAGE));
+	assert(pind < PSSET_NPSIZES);
+	return pind;
 }
 
 static void
-psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+psset_hpdata_heap_remove(psset_t *psset, hpdata_t *ps) {
+	pszind_t pind = psset_hpdata_heap_index(ps);
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
@@ -101,7 +144,8 @@ psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 }
 
 static void
-psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+psset_hpdata_heap_insert(psset_t *psset, hpdata_t *ps) {
+	pszind_t pind = psset_hpdata_heap_index(ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
@@ -109,38 +153,30 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 }
 
 static void
-psset_stats_insert(psset_t* psset, hpdata_t *ps) {
+psset_stats_insert(psset_t *psset, hpdata_t *ps) {
+	psset_stats_t *stats = &psset->stats;
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_insert(psset, psset->stats.empty_slabs, ps);
+		psset_slab_stats_insert(stats, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
+		psset_slab_stats_insert(stats, psset->stats.full_slabs, ps);
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
-		psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
-		    ps);
+		pszind_t pind = psset_hpdata_heap_index(ps);
+		psset_slab_stats_insert(
+		    stats, psset->stats.nonfull_slabs[pind], ps);
 	}
 }
 
 static void
 psset_stats_remove(psset_t *psset, hpdata_t *ps) {
+	psset_stats_t *stats = &psset->stats;
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_remove(psset, psset->stats.empty_slabs, ps);
+		psset_slab_stats_remove(stats, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
+		psset_slab_stats_remove(stats, psset->stats.full_slabs, ps);
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
-		psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
-		    ps);
+		pszind_t pind = psset_hpdata_heap_index(ps);
+		psset_slab_stats_remove(
+		    stats, psset->stats.nonfull_slabs[pind], ps);
 	}
 }
 
@@ -165,13 +201,7 @@ psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) {
 		 * going to return them from a psset_pick_alloc call.
 		 */
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
-		psset_hpdata_heap_insert(psset, pind, ps);
+		psset_hpdata_heap_insert(psset, ps);
 	}
 }
 
@@ -186,13 +216,7 @@ psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
 	} else if (hpdata_full(ps)) {
 		/* Same as above -- do nothing in this case. */
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
-		psset_hpdata_heap_remove(psset, pind, ps);
+		psset_hpdata_heap_remove(psset, ps);
 	}
 }
 
@@ -240,7 +264,7 @@ psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
 	 * purge LRU within a given dirtiness bucket.
 	 */
 	if (hpdata_purge_allowed_get(ps)) {
-		size_t ind = psset_purge_list_ind(ps);
+		size_t               ind = psset_purge_list_ind(ps);
 		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
 		hpdata_purge_list_remove(purge_list, ps);
 		if (hpdata_purge_list_empty(purge_list)) {
@@ -252,14 +276,13 @@ psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
 static void
 psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_purge_allowed_get(ps)) {
-		size_t ind = psset_purge_list_ind(ps);
+		size_t               ind = psset_purge_list_ind(ps);
 		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
 		if (hpdata_purge_list_empty(purge_list)) {
 			fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
 		}
 		hpdata_purge_list_append(purge_list, ps);
 	}
-
 }
 
 void
@@ -313,18 +336,50 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 }
 
+static hpdata_t *
+psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) {
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
+		return NULL;
+	}
+
+	hpdata_t                          *ps = NULL;
+	hpdata_age_heap_enumerate_helper_t helper;
+	hpdata_age_heap_enumerate_prepare(&psset->pageslabs[pind], &helper,
+	    PSSET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+
+	while ((ps = hpdata_age_heap_enumerate_next(
+	            &psset->pageslabs[pind], &helper))) {
+		if (hpdata_longest_free_range_get(ps) >= size) {
+			return ps;
+		}
+	}
+
+	return NULL;
+}
+
 hpdata_t *
 psset_pick_alloc(psset_t *psset, size_t size) {
 	assert((size & PAGE_MASK) == 0);
 	assert(size <= HUGEPAGE);
 
-	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-	pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
-	    (size_t)min_pind);
+	pszind_t  min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	hpdata_t *ps = NULL;
+
+	/* See comments in eset_first_fit for why we enumerate search below. */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
+	if (sz_large_size_classes_disabled() && pind_prev < min_pind) {
+		ps = psset_enumerate_search(psset, pind_prev, size);
+		if (ps != NULL) {
+			return ps;
+		}
+	}
+
+	pszind_t pind = (pszind_t)fb_ffs(
+	    psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);
 	}
-	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
+	ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
 		return NULL;
 	}
@@ -335,17 +390,40 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 }
 
 hpdata_t *
-psset_pick_purge(psset_t *psset) {
-	ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS,
-	    PSSET_NPURGE_LISTS - 1);
-	if (ind_ssz < 0) {
-		return NULL;
+psset_pick_purge(psset_t *psset, const nstime_t *now) {
+	size_t max_bit = PSSET_NPURGE_LISTS - 1;
+	while (1) {
+		ssize_t ind_ssz = fb_fls(
+		    psset->purge_bitmap, PSSET_NPURGE_LISTS, max_bit);
+		if (ind_ssz < 0) {
+			break;
+		}
+		pszind_t ind = (pszind_t)ind_ssz;
+		assert(ind < PSSET_NPURGE_LISTS);
+		hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
+		assert(ps != NULL);
+		if (now == NULL) {
+			return ps;
+		}
+		/*
+		 * We only check the first page (it had least recent hpa_alloc
+		 * or hpa_dalloc). It is possible that some page in the list
+		 * would meet the time, but we only guarantee the min delay. If
+		 * we want to get the one that changed the state to purgable
+		 * the earliest, we would change the list into a heap ordered by
+		 * time.  We will use benchmark to make a decision.
+		 */
+		const nstime_t *tm_allowed = hpdata_time_purge_allowed_get(ps);
+		if (nstime_compare(tm_allowed, now) <= 0) {
+			return ps;
+		}
+		if (ind == 0) {
+			break;
+		}
+		max_bit = ind - 1;
 	}
-	pszind_t ind = (pszind_t)ind_ssz;
-	assert(ind < PSSET_NPURGE_LISTS);
-	hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
-	assert(ps != NULL);
-	return ps;
+	/* No page is ready yet */
+	return NULL;
 }
 
 hpdata_t *
diff --git a/src/rtree.c b/src/rtree.c
index 6496b5af..ac27f829 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -20,7 +20,7 @@ rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 	rtree->base = base;
 
 	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -29,19 +29,19 @@ rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 
 static rtree_node_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_node_elm_t *)base_alloc(tsdn, rtree->base,
-	    nelms * sizeof(rtree_node_elm_t), CACHELINE);
+	return (rtree_node_elm_t *)base_alloc_rtree(
+	    tsdn, rtree->base, nelms * sizeof(rtree_node_elm_t));
 }
 
 static rtree_leaf_elm_t *
 rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_leaf_elm_t *)base_alloc(tsdn, rtree->base,
-	    nelms * sizeof(rtree_leaf_elm_t), CACHELINE);
+	return (rtree_leaf_elm_t *)base_alloc_rtree(
+	    tsdn, rtree->base, nelms * sizeof(rtree_leaf_elm_t));
 }
 
 static rtree_node_elm_t *
-rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
-    atomic_p_t *elmp) {
+rtree_node_init(
+    tsdn_t *tsdn, rtree_t *rtree, unsigned level, atomic_p_t *elmp) {
 	malloc_mutex_lock(tsdn, &rtree->init_lock);
 	/*
 	 * If *elmp is non-null, then it was initialized with the init lock
@@ -49,8 +49,8 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 	 */
 	rtree_node_elm_t *node = atomic_load_p(elmp, ATOMIC_RELAXED);
 	if (node == NULL) {
-		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
-		    rtree_levels[level].bits);
+		node = rtree_node_alloc(
+		    tsdn, rtree, ZU(1) << rtree_levels[level].bits);
 		if (node == NULL) {
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return NULL;
@@ -75,8 +75,8 @@ rtree_leaf_init(tsdn_t *tsdn, rtree_t *rtree, atomic_p_t *elmp) {
 	 */
 	rtree_leaf_elm_t *leaf = atomic_load_p(elmp, ATOMIC_RELAXED);
 	if (leaf == NULL) {
-		leaf = rtree_leaf_alloc(tsdn, rtree, ZU(1) <<
-		    rtree_levels[RTREE_HEIGHT-1].bits);
+		leaf = rtree_leaf_alloc(
+		    tsdn, rtree, ZU(1) << rtree_levels[RTREE_HEIGHT - 1].bits);
 		if (leaf == NULL) {
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return NULL;
@@ -107,11 +107,11 @@ rtree_child_node_tryread(rtree_node_elm_t *elm, bool dependent) {
 	rtree_node_elm_t *node;
 
 	if (dependent) {
-		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_RELAXED);
+		node = (rtree_node_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_RELAXED);
 	} else {
-		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_ACQUIRE);
+		node = (rtree_node_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_ACQUIRE);
 	}
 
 	assert(!dependent || node != NULL);
@@ -136,11 +136,11 @@ rtree_child_leaf_tryread(rtree_node_elm_t *elm, bool dependent) {
 	rtree_leaf_elm_t *leaf;
 
 	if (dependent) {
-		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_RELAXED);
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_RELAXED);
 	} else {
-		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_ACQUIRE);
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_ACQUIRE);
 	}
 
 	assert(!dependent || leaf != NULL);
@@ -181,53 +181,54 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		}
 	}
 
-#define RTREE_GET_CHILD(level) {					\
-		assert(level < RTREE_HEIGHT-1);				\
-		if (level != 0 && !dependent &&				\
-		    unlikely(!rtree_node_valid(node))) {		\
-			return NULL;					\
-		}							\
-		uintptr_t subkey = rtree_subkey(key, level);		\
-		if (level + 2 < RTREE_HEIGHT) {				\
-			node = init_missing ?				\
-			    rtree_child_node_read(tsdn, rtree,		\
-			    &node[subkey], level, dependent) :		\
-			    rtree_child_node_tryread(&node[subkey],	\
-			    dependent);					\
-		} else {						\
-			leaf = init_missing ?				\
-			    rtree_child_leaf_read(tsdn, rtree,		\
-			    &node[subkey], level, dependent) :		\
-			    rtree_child_leaf_tryread(&node[subkey],	\
-			    dependent);					\
-		}							\
+#define RTREE_GET_CHILD(level)                                                 \
+	{                                                                      \
+		assert(level < RTREE_HEIGHT - 1);                              \
+		if (level != 0 && !dependent                                   \
+		    && unlikely(!rtree_node_valid(node))) {                    \
+			return NULL;                                           \
+		}                                                              \
+		uintptr_t subkey = rtree_subkey(key, level);                   \
+		if (level + 2 < RTREE_HEIGHT) {                                \
+			node = init_missing                                    \
+			    ? rtree_child_node_read(tsdn, rtree,               \
+			          &node[subkey], level, dependent)             \
+			    : rtree_child_node_tryread(                        \
+			          &node[subkey], dependent);                   \
+		} else {                                                       \
+			leaf = init_missing                                    \
+			    ? rtree_child_leaf_read(tsdn, rtree,               \
+			          &node[subkey], level, dependent)             \
+			    : rtree_child_leaf_tryread(                        \
+			          &node[subkey], dependent);                   \
+		}                                                              \
 	}
 	/*
 	 * Cache replacement upon hard lookup (i.e. L1 & L2 rtree cache miss):
 	 * (1) evict last entry in L2 cache; (2) move the collision slot from L1
 	 * cache down to L2; and 3) fill L1.
 	 */
-#define RTREE_GET_LEAF(level) {						\
-		assert(level == RTREE_HEIGHT-1);			\
-		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {	\
-			return NULL;					\
-		}							\
-		if (RTREE_CTX_NCACHE_L2 > 1) {				\
-			memmove(&rtree_ctx->l2_cache[1],		\
-			    &rtree_ctx->l2_cache[0],			\
-			    sizeof(rtree_ctx_cache_elm_t) *		\
-			    (RTREE_CTX_NCACHE_L2 - 1));			\
-		}							\
-		size_t slot = rtree_cache_direct_map(key);		\
-		rtree_ctx->l2_cache[0].leafkey =			\
-		    rtree_ctx->cache[slot].leafkey;			\
-		rtree_ctx->l2_cache[0].leaf =				\
-		    rtree_ctx->cache[slot].leaf;			\
-		uintptr_t leafkey = rtree_leafkey(key);			\
-		rtree_ctx->cache[slot].leafkey = leafkey;		\
-		rtree_ctx->cache[slot].leaf = leaf;			\
-		uintptr_t subkey = rtree_subkey(key, level);		\
-		return &leaf[subkey];					\
+#define RTREE_GET_LEAF(level)                                                  \
+	{                                                                      \
+		assert(level == RTREE_HEIGHT - 1);                             \
+		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {         \
+			return NULL;                                           \
+		}                                                              \
+		if (RTREE_CTX_NCACHE_L2 > 1) {                                 \
+			memmove(&rtree_ctx->l2_cache[1],                       \
+			    &rtree_ctx->l2_cache[0],                           \
+			    sizeof(rtree_ctx_cache_elm_t)                      \
+			        * (RTREE_CTX_NCACHE_L2 - 1));                  \
+		}                                                              \
+		size_t slot = rtree_cache_direct_map(key);                     \
+		rtree_ctx->l2_cache[0].leafkey =                               \
+		    rtree_ctx->cache[slot].leafkey;                            \
+		rtree_ctx->l2_cache[0].leaf = rtree_ctx->cache[slot].leaf;     \
+		uintptr_t leafkey = rtree_leafkey(key);                        \
+		rtree_ctx->cache[slot].leafkey = leafkey;                      \
+		rtree_ctx->cache[slot].leaf = leaf;                            \
+		uintptr_t subkey = rtree_subkey(key, level);                   \
+		return &leaf[subkey];                                          \
 	}
 	if (RTREE_HEIGHT > 1) {
 		RTREE_GET_CHILD(0)
@@ -236,11 +237,11 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		RTREE_GET_CHILD(1)
 	}
 	if (RTREE_HEIGHT > 3) {
-		for (unsigned i = 2; i < RTREE_HEIGHT-1; i++) {
+		for (unsigned i = 2; i < RTREE_HEIGHT - 1; i++) {
 			RTREE_GET_CHILD(i)
 		}
 	}
-	RTREE_GET_LEAF(RTREE_HEIGHT-1)
+	RTREE_GET_LEAF(RTREE_HEIGHT - 1)
 #undef RTREE_GET_CHILD
 #undef RTREE_GET_LEAF
 	not_reached();
diff --git a/src/safety_check.c b/src/safety_check.c
index 209fdda9..d052718d 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,30 +3,34 @@
 
 static safety_check_abort_hook_t safety_check_abort;
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
+void
+safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size) {
-	char *src = current_dealloc ? "the current pointer being freed" :
-	    "in thread cache, possibly from previous deallocations";
+	char *src = current_dealloc
+	    ? "the current pointer being freed"
+	    : "in thread cache, possibly from previous deallocations";
+	char *suggest_debug_build = config_debug ? "" : " --enable-debug or";
 
-	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
+	safety_check_fail(
+	    "<jemalloc>: size mismatch detected (true size %zu "
 	    "vs input size %zu), likely caused by application sized "
-	    "deallocation bugs (source address: %p, %s). Suggest building with "
-	    "--enable-debug or address sanitizer for debugging. Abort.\n",
-	    true_size, input_size, ptr, src);
+	    "deallocation bugs (source address: %p, %s). Suggest building with"
+	    "%s address sanitizer for debugging. Abort.\n",
+	    true_size, input_size, ptr, src, suggest_debug_build);
 }
 
-void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
+void
+safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
 	safety_check_abort = abort_fn;
 }
 
-void safety_check_fail(const char *format, ...) {
-	char buf[MALLOC_PRINTF_BUFSIZE];
-
-	va_list ap;
-	va_start(ap, format);
-	malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
-	va_end(ap);
-
+/*
+ * In addition to malloc_write, also embed hint msg in the abort function name
+ * because there are cases only logging crash stack traces.
+ */
+static void
+safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(
+    const char *buf) {
 	if (safety_check_abort == NULL) {
 		malloc_write(buf);
 		abort();
@@ -34,3 +38,16 @@ void safety_check_fail(const char *format, ...) {
 		safety_check_abort(buf);
 	}
 }
+
+void
+safety_check_fail(const char *format, ...) {
+	char buf[MALLOC_PRINTF_BUFSIZE];
+
+	va_list ap;
+	va_start(ap, format);
+	malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
+	va_end(ap);
+
+	safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(
+	    buf);
+}
diff --git a/src/san.c b/src/san.c
index 6e512911..5448c67f 100644
--- a/src/san.c
+++ b/src/san.c
@@ -20,43 +20,43 @@ ssize_t opt_lg_san_uaf_align = SAN_LG_UAF_ALIGN_DEFAULT;
 uintptr_t san_cache_bin_nonfast_mask = SAN_CACHE_BIN_NONFAST_MASK_DEFAULT;
 
 static inline void
-san_find_guarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
-    uintptr_t *addr, size_t size, bool left, bool right) {
+san_find_guarded_addr(edata_t *edata, void **guard1, void **guard2, void **addr,
+    size_t size, bool left, bool right) {
 	assert(!edata_guarded_get(edata));
 	assert(size % PAGE == 0);
-	*addr = (uintptr_t)edata_base_get(edata);
+	*addr = edata_base_get(edata);
 	if (left) {
 		*guard1 = *addr;
-		*addr += SAN_PAGE_GUARD;
+		*addr = ((byte_t *)*addr) + SAN_PAGE_GUARD;
 	} else {
-		*guard1 = 0;
+		*guard1 = NULL;
 	}
 
 	if (right) {
-		*guard2 = *addr + size;
+		*guard2 = ((byte_t *)*addr) + size;
 	} else {
-		*guard2 = 0;
+		*guard2 = NULL;
 	}
 }
 
 static inline void
-san_find_unguarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
-    uintptr_t *addr, size_t size, bool left, bool right) {
+san_find_unguarded_addr(edata_t *edata, void **guard1, void **guard2,
+    void **addr, size_t size, bool left, bool right) {
 	assert(edata_guarded_get(edata));
 	assert(size % PAGE == 0);
-	*addr = (uintptr_t)edata_base_get(edata);
+	*addr = edata_base_get(edata);
 	if (right) {
-		*guard2 = *addr + size;
+		*guard2 = ((byte_t *)*addr) + size;
 	} else {
-		*guard2 = 0;
+		*guard2 = NULL;
 	}
 
 	if (left) {
-		*guard1 = *addr - SAN_PAGE_GUARD;
-		assert(*guard1 != 0);
+		*guard1 = ((byte_t *)*addr) - SAN_PAGE_GUARD;
+		assert(*guard1 != NULL);
 		*addr = *guard1;
 	} else {
-		*guard1 = 0;
+		*guard1 = NULL;
 	}
 }
 
@@ -73,16 +73,16 @@ san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
 	    ? san_two_side_unguarded_sz(size_with_guards)
 	    : san_one_side_unguarded_sz(size_with_guards);
 
-	uintptr_t guard1, guard2, addr;
-	san_find_guarded_addr(edata, &guard1, &guard2, &addr, usize, left,
-	    right);
+	void *guard1, *guard2, *addr;
+	san_find_guarded_addr(
+	    edata, &guard1, &guard2, &addr, usize, left, right);
 
 	assert(edata_state_get(edata) == extent_state_active);
-	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+	ehooks_guard(tsdn, ehooks, guard1, guard2);
 
 	/* Update the guarded addr and usable size of the edata. */
 	edata_size_set(edata, usize);
-	edata_addr_set(edata, (void *)addr);
+	edata_addr_set(edata, addr);
 	edata_guarded_set(edata, true);
 
 	if (remap) {
@@ -108,9 +108,9 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    ? san_two_side_guarded_sz(size)
 	    : san_one_side_guarded_sz(size);
 
-	uintptr_t guard1, guard2, addr;
-	san_find_unguarded_addr(edata, &guard1, &guard2, &addr, size, left,
-	    right);
+	void *guard1, *guard2, *addr;
+	san_find_unguarded_addr(
+	    edata, &guard1, &guard2, &addr, size, left, right);
 
 	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
 
@@ -130,15 +130,15 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 }
 
 void
-san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap, bool left, bool right) {
+san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
+    bool left, bool right) {
 	san_unguard_pages_impl(tsdn, ehooks, edata, emap, left, right,
 	    /* remap */ true);
 }
 
 void
-san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
+san_unguard_pages_pre_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	emap_assert_not_mapped(tsdn, emap, edata);
 	/*
 	 * We don't want to touch the emap of about to be destroyed extents, as
@@ -146,7 +146,7 @@ san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	 * we unguard the extents to the right, because retained extents only
 	 * own their right guard page per san_bump_alloc's logic.
 	 */
-	 san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* left */ false,
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* left */ false,
 	    /* right */ true, /* remap */ false);
 }
 
@@ -163,9 +163,9 @@ san_stashed_corrupted(void *ptr, size_t size) {
 
 	void *first, *mid, *last;
 	san_junk_ptr_locations(ptr, size, &first, &mid, &last);
-	if (*(uintptr_t *)first != uaf_detect_junk ||
-	    *(uintptr_t *)mid != uaf_detect_junk ||
-	    *(uintptr_t *)last != uaf_detect_junk) {
+	if (*(uintptr_t *)first != uaf_detect_junk
+	    || *(uintptr_t *)mid != uaf_detect_junk
+	    || *(uintptr_t *)last != uaf_detect_junk) {
 		return true;
 	}
 
@@ -183,7 +183,8 @@ san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize) {
 		assert(stashed != NULL);
 		assert(cache_bin_nonfast_aligned(stashed));
 		if (unlikely(san_stashed_corrupted(stashed, usize))) {
-			safety_check_fail("<jemalloc>: Write-after-free "
+			safety_check_fail(
+			    "<jemalloc>: Write-after-free "
 			    "detected on deallocated pointer %p (size %zu).\n",
 			    stashed, usize);
 		}
diff --git a/src/san_bump.c b/src/san_bump.c
index 88897455..11031290 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -7,31 +7,31 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/edata_cache.h"
 
-static bool
-san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
-    ehooks_t *ehooks, size_t size);
+static bool san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba,
+    pac_t *pac, ehooks_t *ehooks, size_t size);
 
 edata_t *
-san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
+san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
     ehooks_t *ehooks, size_t size, bool zero) {
 	assert(san_bump_enabled());
 
-	edata_t* to_destroy;
-	size_t guarded_size = san_one_side_guarded_sz(size);
+	edata_t *to_destroy;
+	size_t   guarded_size = san_one_side_guarded_sz(size);
 
 	malloc_mutex_lock(tsdn, &sba->mtx);
 
-	if (sba->curr_reg == NULL ||
-	    edata_size_get(sba->curr_reg) < guarded_size) {
+	if (sba->curr_reg == NULL
+	    || edata_size_get(sba->curr_reg) < guarded_size) {
 		/*
 		 * If the current region can't accommodate the allocation,
 		 * try replacing it with a larger one and destroy current if the
 		 * replacement succeeds.
 		 */
 		to_destroy = sba->curr_reg;
-		bool err = san_bump_grow_locked(tsdn, sba, pac, ehooks,
-		    guarded_size);
+		bool err = san_bump_grow_locked(
+		    tsdn, sba, pac, ehooks, guarded_size);
 		if (err) {
+			sba->curr_reg = to_destroy;
 			goto label_err;
 		}
 	} else {
@@ -40,9 +40,9 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
 	assert(guarded_size <= edata_size_get(sba->curr_reg));
 	size_t trail_size = edata_size_get(sba->curr_reg) - guarded_size;
 
-	edata_t* edata;
+	edata_t *edata;
 	if (trail_size != 0) {
-		edata_t* curr_reg_trail = extent_split_wrapper(tsdn, pac,
+		edata_t *curr_reg_trail = extent_split_wrapper(tsdn, pac,
 		    ehooks, sba->curr_reg, guarded_size, trail_size,
 		    /* holding_core_locks */ true);
 		if (curr_reg_trail == NULL) {
@@ -69,9 +69,8 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
 	    /* right */ true, /* remap */ true);
 
 	if (extent_commit_zero(tsdn, ehooks, edata, /* commit */ true, zero,
-	    /* growing_retained */ false)) {
-		extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-		    edata);
+	        /* growing_retained */ false)) {
+		extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
 		return NULL;
 	}
 
@@ -90,9 +89,10 @@ san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
     ehooks_t *ehooks, size_t size) {
 	malloc_mutex_assert_owner(tsdn, &sba->mtx);
 
-	bool committed = false, zeroed = false;
-	size_t alloc_size = size > SBA_RETAINED_ALLOC_SIZE ? size :
-	    SBA_RETAINED_ALLOC_SIZE;
+	bool   committed = false, zeroed = false;
+	size_t alloc_size = size > SBA_RETAINED_ALLOC_SIZE
+	    ? size
+	    : SBA_RETAINED_ALLOC_SIZE;
 	assert((alloc_size & PAGE_MASK) == 0);
 	sba->curr_reg = extent_alloc_wrapper(tsdn, pac, ehooks, NULL,
 	    alloc_size, PAGE, zeroed, &committed,
diff --git a/src/sc.c b/src/sc.c
index e4a94d89..014ab95d 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -27,7 +27,7 @@ slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) {
 	size_t try_slab_size = page;
 	size_t try_nregs = try_slab_size / reg_size;
 	size_t perfect_slab_size = 0;
-	bool perfect = false;
+	bool   perfect = false;
 	/*
 	 * This loop continues until we find the least common multiple of the
 	 * page size and size class size.  Size classes are all of the form
@@ -106,7 +106,7 @@ size_classes(
 	/* Outputs that we update as we go. */
 	size_t lookup_maxclass = 0;
 	size_t small_maxclass = 0;
-	int lg_large_minclass = 0;
+	int    lg_large_minclass = 0;
 	size_t large_maxclass = 0;
 
 	/* Tiny size classes. */
@@ -209,7 +209,7 @@ size_classes(
 		lg_delta++;
 	}
 	/* Additional outputs. */
-	int nsizes = index;
+	int      nsizes = index;
 	unsigned lg_ceil_nsizes = lg_ceil(nsizes);
 
 	/* Fill in the output data. */
@@ -292,8 +292,8 @@ sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) {
 		if (!sc->bin) {
 			break;
 		}
-		size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta,
-		    sc->ndelta);
+		size_t reg_size = reg_size_compute(
+		    sc->lg_base, sc->lg_delta, sc->ndelta);
 		if (begin <= reg_size && reg_size <= end) {
 			sc_data_update_sc_slab_size(sc, reg_size, pgs);
 		}
diff --git a/src/sec.c b/src/sec.c
index df675590..a3254537 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -2,90 +2,58 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/sec.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
-static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated);
-static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
-
-static void
+static bool
 sec_bin_init(sec_bin_t *bin) {
-	bin->being_batch_filled = false;
 	bin->bytes_cur = 0;
+	sec_bin_stats_init(&bin->stats);
 	edata_list_active_init(&bin->freelist);
-}
-
-bool
-sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
-    const sec_opts_t *opts) {
-	assert(opts->max_alloc >= PAGE);
-
-	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
-	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
-
-	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
-	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
-	size_t sz_alloc = sz_shards + sz_bins;
-	void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
-	if (dynalloc == NULL) {
+	bool err = malloc_mutex_init(&bin->mtx, "sec_bin", WITNESS_RANK_SEC_BIN,
+	    malloc_mutex_rank_exclusive);
+	if (err) {
 		return true;
 	}
-	sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
-	sec->shards = shard_cur;
-	sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
-	/* Just for asserts, below. */
-	sec_bin_t *bin_start = bin_cur;
-
-	for (size_t i = 0; i < opts->nshards; i++) {
-		sec_shard_t *shard = shard_cur;
-		shard_cur++;
-		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
-		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
-		if (err) {
-			return true;
-		}
-		shard->enabled = true;
-		shard->bins = bin_cur;
-		for (pszind_t j = 0; j < npsizes; j++) {
-			sec_bin_init(&shard->bins[j]);
-			bin_cur++;
-		}
-		shard->bytes_cur = 0;
-		shard->to_flush_next = 0;
-	}
-	/*
-	 * Should have exactly matched the bin_start to the first unused byte
-	 * after the shards.
-	 */
-	assert((void *)shard_cur == (void *)bin_start);
-	/* And the last bin to use up the last bytes of the allocation. */
-	assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
-	sec->fallback = fallback;
-
-
-	sec->opts = *opts;
-	sec->npsizes = npsizes;
-
-	/*
-	 * Initialize these last so that an improper use of an SEC whose
-	 * initialization failed will segfault in an easy-to-spot way.
-	 */
-	sec->pai.alloc = &sec_alloc;
-	sec->pai.alloc_batch = &pai_alloc_batch_default;
-	sec->pai.expand = &sec_expand;
-	sec->pai.shrink = &sec_shrink;
-	sec->pai.dalloc = &sec_dalloc;
-	sec->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
 
-static sec_shard_t *
+bool
+sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
+	sec->opts = *opts;
+	if (opts->nshards == 0) {
+		return false;
+	}
+	assert(opts->max_alloc >= PAGE);
+
+	/*
+	 * Same as tcache, sec do not cache allocs/dallocs larger than
+	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
+	 * by PAGE and the number of usizes is too large.
+	 */
+	assert(opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
+
+	size_t   max_alloc = PAGE_FLOOR(opts->max_alloc);
+	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
+
+	size_t ntotal_bins = opts->nshards * (size_t)npsizes;
+	size_t sz_bins = sizeof(sec_bin_t) * ntotal_bins;
+	void  *dynalloc = base_alloc(tsdn, base, sz_bins, CACHELINE);
+	if (dynalloc == NULL) {
+		return true;
+	}
+	sec->bins = (sec_bin_t *)dynalloc;
+	for (pszind_t j = 0; j < ntotal_bins; j++) {
+		if (sec_bin_init(&sec->bins[j])) {
+			return true;
+		}
+	}
+	sec->npsizes = npsizes;
+
+	return false;
+}
+
+static uint8_t
 sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	/*
 	 * Eventually, we should implement affinity, tracking source shard using
@@ -93,9 +61,9 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	 * distribute across all shards.
 	 */
 	if (tsdn_null(tsdn)) {
-		return &sec->shards[0];
+		return 0;
 	}
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint8_t *idxp = tsd_sec_shardp_get(tsd);
 	if (*idxp == (uint8_t)-1) {
 		/*
@@ -104,319 +72,314 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		 * number to store 32 bits, since we'll deliberately overflow
 		 * when we multiply by the number of shards.
 		 */
-		uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
-		uint32_t idx =
-		    (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
+		uint64_t rand32 = prng_lg_range_u64(
+		    tsd_prng_statep_get(tsd), 32);
+		uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->opts.nshards)
+		    >> 32);
 		assert(idx < (uint32_t)sec->opts.nshards);
 		*idxp = (uint8_t)idx;
 	}
-	return &sec->shards[*idxp];
+	return *idxp;
 }
 
-/*
- * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
- * empty cache, we'll try to fill it, which can push the shard over it's limit.
- */
-static void
-sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	edata_list_active_t to_flush;
-	edata_list_active_init(&to_flush);
-	while (shard->bytes_cur > sec->opts.bytes_after_flush) {
-		/* Pick a victim. */
-		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
-
-		/* Update our victim-picking state. */
-		shard->to_flush_next++;
-		if (shard->to_flush_next == sec->npsizes) {
-			shard->to_flush_next = 0;
-		}
-
-		assert(shard->bytes_cur >= bin->bytes_cur);
-		if (bin->bytes_cur != 0) {
-			shard->bytes_cur -= bin->bytes_cur;
-			bin->bytes_cur = 0;
-			edata_list_active_concat(&to_flush, &bin->freelist);
-		}
-		/*
-		 * Either bin->bytes_cur was 0, in which case we didn't touch
-		 * the bin list but it should be empty anyways (or else we
-		 * missed a bytes_cur update on a list modification), or it
-		 * *was* 0 and we emptied it ourselves.  Either way, it should
-		 * be empty now.
-		 */
-		assert(edata_list_active_empty(&bin->freelist));
-	}
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	bool deferred_work_generated = false;
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
-	    &deferred_work_generated);
+static sec_bin_t *
+sec_bin_pick(sec_t *sec, uint8_t shard, pszind_t pszind) {
+	assert(shard < sec->opts.nshards);
+	size_t ind = (size_t)shard * sec->npsizes + pszind;
+	assert(ind < sec->npsizes * sec->opts.nshards);
+	return &sec->bins[ind];
 }
 
 static edata_t *
-sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    sec_bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	if (!shard->enabled) {
-		return NULL;
-	}
+sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) {
+	malloc_mutex_assert_owner(tsdn, &bin->mtx);
+
 	edata_t *edata = edata_list_active_first(&bin->freelist);
 	if (edata != NULL) {
+		assert(!edata_list_active_empty(&bin->freelist));
 		edata_list_active_remove(&bin->freelist, edata);
-		assert(edata_size_get(edata) <= bin->bytes_cur);
-		bin->bytes_cur -= edata_size_get(edata);
-		assert(edata_size_get(edata) <= shard->bytes_cur);
-		shard->bytes_cur -= edata_size_get(edata);
+		size_t sz = edata_size_get(edata);
+		assert(sz <= bin->bytes_cur && sz > 0);
+		bin->bytes_cur -= sz;
+		bin->stats.nhits++;
 	}
 	return edata;
 }
 
 static edata_t *
-sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    sec_bin_t *bin, size_t size) {
-	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+sec_multishard_trylock_alloc(
+    tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind) {
+	assert(sec->opts.nshards > 0);
 
-	edata_list_active_t result;
-	edata_list_active_init(&result);
-	bool deferred_work_generated = false;
-	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
-
-	edata_t *ret = edata_list_active_first(&result);
-	if (ret != NULL) {
-		edata_list_active_remove(&result, ret);
+	uint8_t    cur_shard = sec_shard_pick(tsdn, sec);
+	sec_bin_t *bin;
+	for (size_t i = 0; i < sec->opts.nshards; ++i) {
+		bin = sec_bin_pick(sec, cur_shard, pszind);
+		if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
+			edata_t *edata = sec_bin_alloc_locked(
+			    tsdn, sec, bin, size);
+			malloc_mutex_unlock(tsdn, &bin->mtx);
+			if (edata != NULL) {
+				JE_USDT(sec_alloc, 5, sec, bin, edata, size,
+				    /* frequent_reuse */ 1);
+				return edata;
+			}
+		}
+		cur_shard++;
+		if (cur_shard == sec->opts.nshards) {
+			cur_shard = 0;
+		}
 	}
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	bin->being_batch_filled = false;
 	/*
-	 * Handle the easy case first: nothing to cache.  Note that this can
-	 * only happen in case of OOM, since sec_alloc checks the expected
-	 * number of allocs, and doesn't bother going down the batch_fill
-	 * pathway if there won't be anything left to cache.  So to be in this
-	 * code path, we must have asked for > 1 alloc, but only gotten 1 back.
+	 * TODO: Benchmark whether it is worth blocking on all shards here before
+	 * declaring a miss.  That could recover more remote-shard hits under
+	 * contention, but it also changes the allocation latency policy.
 	 */
-	if (nalloc <= 1) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		return ret;
+	assert(cur_shard == sec_shard_pick(tsdn, sec));
+	bin = sec_bin_pick(sec, cur_shard, pszind);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
+	if (edata == NULL) {
+		/* Only now we know it is a miss. */
+		bin->stats.nmisses++;
 	}
-
-	size_t new_cached_bytes = (nalloc - 1) * size;
-
-	edata_list_active_concat(&bin->freelist, &result);
-	bin->bytes_cur += new_cached_bytes;
-	shard->bytes_cur += new_cached_bytes;
-
-	if (shard->bytes_cur > sec->opts.max_bytes) {
-		sec_flush_some_and_unlock(tsdn, sec, shard);
-	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
-
-	return ret;
+	malloc_mutex_unlock(tsdn, &bin->mtx);
+	JE_USDT(sec_alloc, 5, sec, bin, edata, size, /* frequent_reuse */ 1);
+	return edata;
 }
 
-static edata_t *
-sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
+edata_t *
+sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
+	if (!sec_size_supported(sec, size)) {
+		return NULL;
+	}
 	assert((size & PAGE_MASK) == 0);
-	assert(!guarded);
-
-	sec_t *sec = (sec_t *)self;
-
-	if (zero || alignment > PAGE || sec->opts.nshards == 0
-	    || size > sec->opts.max_alloc) {
-		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
-		    /* guarded */ false, frequent_reuse,
-		    deferred_work_generated);
-	}
 	pszind_t pszind = sz_psz2ind(size);
 	assert(pszind < sec->npsizes);
 
-	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
-	sec_bin_t *bin = &shard->bins[pszind];
-	bool do_batch_fill = false;
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
-	if (edata == NULL) {
-		if (!bin->being_batch_filled
-		    && sec->opts.batch_fill_extra > 0) {
-			bin->being_batch_filled = true;
-			do_batch_fill = true;
+	/*
+	 * If there's only one shard, skip the trylock optimization and
+	 * go straight to the blocking lock.
+	 */
+	if (sec->opts.nshards == 1) {
+		sec_bin_t *bin = sec_bin_pick(sec, /* shard */ 0, pszind);
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
+		if (edata == NULL) {
+			bin->stats.nmisses++;
 		}
+		malloc_mutex_unlock(tsdn, &bin->mtx);
+		JE_USDT(sec_alloc, 5, sec, bin, edata, size,
+		    /* frequent_reuse */ 1);
+		return edata;
 	}
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (edata == NULL) {
-		if (do_batch_fill) {
-			edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
-			    size);
-		} else {
-			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero, /* guarded */ false, frequent_reuse,
-			    deferred_work_generated);
-		}
-	}
-	return edata;
-}
-
-static bool
-sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero, bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
-	    deferred_work_generated);
-}
-
-static bool
-sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
-	    deferred_work_generated);
+	return sec_multishard_trylock_alloc(tsdn, sec, size, pszind);
 }
 
 static void
-sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	shard->bytes_cur = 0;
-	edata_list_active_t to_flush;
-	edata_list_active_init(&to_flush);
-	for (pszind_t i = 0; i < sec->npsizes; i++) {
-		sec_bin_t *bin = &shard->bins[i];
-		bin->bytes_cur = 0;
-		edata_list_active_concat(&to_flush, &bin->freelist);
-	}
+sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size,
+    edata_list_active_t *dalloc_list) {
+	malloc_mutex_assert_owner(tsdn, &bin->mtx);
 
-	/*
-	 * Ordinarily we would try to avoid doing the batch deallocation while
-	 * holding the shard mutex, but the flush_all pathways only happen when
-	 * we're disabling the HPA or resetting the arena, both of which are
-	 * rare pathways.
-	 */
-	bool deferred_work_generated = false;
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
-	    &deferred_work_generated);
-}
-
-static void
-sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    edata_t *edata) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	assert(shard->bytes_cur <= sec->opts.max_bytes);
-	size_t size = edata_size_get(edata);
-	pszind_t pszind = sz_psz2ind(size);
-	assert(pszind < sec->npsizes);
-	/*
-	 * Prepending here results in LIFO allocation per bin, which seems
-	 * reasonable.
-	 */
-	sec_bin_t *bin = &shard->bins[pszind];
-	edata_list_active_prepend(&bin->freelist, edata);
 	bin->bytes_cur += size;
-	shard->bytes_cur += size;
-	if (shard->bytes_cur > sec->opts.max_bytes) {
-		/*
-		 * We've exceeded the shard limit.  We make two nods in the
-		 * direction of fragmentation avoidance: we flush everything in
-		 * the shard, rather than one particular bin, and we hold the
-		 * lock while flushing (in case one of the extents we flush is
-		 * highly preferred from a fragmentation-avoidance perspective
-		 * in the backing allocator).  This has the extra advantage of
-		 * not requiring advanced cache balancing strategies.
-		 */
-		sec_flush_some_and_unlock(tsdn, sec, shard);
-		malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
-	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
-}
+	edata_t *edata = edata_list_active_first(dalloc_list);
+	assert(edata != NULL);
+	edata_list_active_remove(dalloc_list, edata);
+	JE_USDT(sec_dalloc, 3, sec, bin, edata);
+	edata_list_active_prepend(&bin->freelist, edata);
+	/* Single extent can be returned to SEC */
+	assert(edata_list_active_empty(dalloc_list));
 
-static void
-sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	if (sec->opts.nshards == 0
-	    || edata_size_get(edata) > sec->opts.max_alloc) {
-		pai_dalloc(tsdn, sec->fallback, edata,
-		    deferred_work_generated);
+	if (bin->bytes_cur <= sec->opts.max_bytes) {
+		bin->stats.ndalloc_noflush++;
 		return;
 	}
-	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	if (shard->enabled) {
-		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
+	bin->stats.ndalloc_flush++;
+	/* we want to flush 1/4 of max_bytes */
+	size_t bytes_target = sec->opts.max_bytes - (sec->opts.max_bytes >> 2);
+	while (bin->bytes_cur > bytes_target
+	    && !edata_list_active_empty(&bin->freelist)) {
+		edata_t *cur = edata_list_active_last(&bin->freelist);
+		size_t   sz = edata_size_get(cur);
+		assert(sz <= bin->bytes_cur && sz > 0);
+		bin->bytes_cur -= sz;
+		edata_list_active_remove(&bin->freelist, cur);
+		edata_list_active_append(dalloc_list, cur);
+	}
+}
+
+static void
+sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
+    pszind_t pszind, edata_list_active_t *dalloc_list) {
+	assert(sec->opts.nshards > 0);
+
+	/* Try to dalloc in this threads bin first */
+	uint8_t cur_shard = sec_shard_pick(tsdn, sec);
+	for (size_t i = 0; i < sec->opts.nshards; ++i) {
+		sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
+		if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
+			sec_bin_dalloc_locked(
+			    tsdn, sec, bin, size, dalloc_list);
+			malloc_mutex_unlock(tsdn, &bin->mtx);
+			return;
+		}
+		cur_shard++;
+		if (cur_shard == sec->opts.nshards) {
+			cur_shard = 0;
+		}
+	}
+	/* No bin had alloc or had the extent */
+	assert(cur_shard == sec_shard_pick(tsdn, sec));
+	sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
+	malloc_mutex_unlock(tsdn, &bin->mtx);
+}
+
+void
+sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	edata_t *edata = edata_list_active_first(dalloc_list);
+	size_t   size = edata_size_get(edata);
+	if (size > sec->opts.max_alloc) {
+		return;
+	}
+	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
+
+	/*
+         * If there's only one shard, skip the trylock optimization and
+	 * go straight to the blocking lock.
+	 */
+	if (sec->opts.nshards == 1) {
+		sec_bin_t *bin = sec_bin_pick(sec, /* shard */ 0, pszind);
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
+		return;
+	}
+	sec_multishard_trylock_dalloc(tsdn, sec, size, pszind, dalloc_list);
+}
+
+void
+sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result,
+    size_t nallocs) {
+	assert((size & PAGE_MASK) == 0);
+	assert(sec->opts.nshards != 0 && size <= sec->opts.max_alloc);
+	assert(nallocs > 0);
+
+	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
+
+	sec_bin_t *bin = sec_bin_pick(sec, sec_shard_pick(tsdn, sec), pszind);
+	malloc_mutex_assert_not_owner(tsdn, &bin->mtx);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	size_t new_cached_bytes = nallocs * size;
+	if (bin->bytes_cur + new_cached_bytes <= sec->opts.max_bytes) {
+		assert(!edata_list_active_empty(result));
+		edata_list_active_concat(&bin->freelist, result);
+		bin->bytes_cur += new_cached_bytes;
 	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		pai_dalloc(tsdn, sec->fallback, edata,
-		    deferred_work_generated);
+		/*
+		 * Unlikely case of many threads filling at the same time and
+		 * going above max.
+		 */
+		bin->stats.noverfills++;
+		while (bin->bytes_cur + size <= sec->opts.max_bytes) {
+			edata_t *edata = edata_list_active_first(result);
+			if (edata == NULL) {
+				break;
+			}
+			edata_list_active_remove(result, edata);
+			assert(size == edata_size_get(edata));
+			edata_list_active_append(&bin->freelist, edata);
+			bin->bytes_cur += size;
+		}
 	}
+	malloc_mutex_unlock(tsdn, &bin->mtx);
 }
 
 void
-sec_flush(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush) {
+	if (!sec_is_used(sec)) {
+		return;
 	}
-}
-
-void
-sec_disable(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sec->shards[i].enabled = false;
-		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		bin->bytes_cur = 0;
+		edata_list_active_concat(to_flush, &bin->freelist);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
+	if (!sec_is_used(sec)) {
+		return;
+	}
 	size_t sum = 0;
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		/*
-		 * We could save these lock acquisitions by making bytes_cur
-		 * atomic, but stats collection is rare anyways and we expect
-		 * the number and type of stats to get more interesting.
-		 */
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sum += sec->shards[i].bytes_cur;
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		sum += bin->bytes_cur;
+		sec_bin_stats_accum(&stats->total, &bin->stats);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 	stats->bytes += sum;
 }
 
 void
-sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
-    mutex_prof_data_t *mutex_prof_data) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		malloc_mutex_prof_accum(tsdn, mutex_prof_data,
-		    &sec->shards[i].mtx);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+sec_mutex_stats_read(
+    tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data) {
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		malloc_mutex_prof_accum(tsdn, mutex_prof_data, &bin->mtx);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_prefork(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_postfork_parent(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_postfork_child(tsdn, &bin->mtx);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index efc70fd3..82458fec 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -9,60 +9,67 @@
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/prof_stats.h"
 
-const char *global_mutex_names[mutex_prof_num_global_mutexes] = {
+static const char *const global_mutex_names[mutex_prof_num_global_mutexes] = {
 #define OP(mtx) #mtx,
-	MUTEX_PROF_GLOBAL_MUTEXES
+    MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 };
 
-const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
+static const char *const arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 #define OP(mtx) #mtx,
-	MUTEX_PROF_ARENA_MUTEXES
+    MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
-#define CTL_GET(n, v, t) do {						\
-	size_t sz = sizeof(t);						\
-	xmallctl(n, (void *)v, &sz, NULL, 0);				\
-} while (0)
+#define CTL_GET(n, v, t)                                                       \
+	do {                                                                   \
+		size_t sz = sizeof(t);                                         \
+		xmallctl(n, (void *)v, &sz, NULL, 0);                          \
+	} while (0)
 
-#define CTL_LEAF_PREPARE(mib, miblen, name) do {			\
-	assert(miblen < CTL_MAX_DEPTH);					\
-	size_t miblen_new = CTL_MAX_DEPTH;				\
-	xmallctlmibnametomib(mib, miblen, name, &miblen_new);		\
-	assert(miblen_new > miblen);					\
-} while (0)
+#define CTL_LEAF_PREPARE(mib, miblen, name)                                    \
+	do {                                                                   \
+		assert(miblen < CTL_MAX_DEPTH);                                \
+		size_t miblen_new = CTL_MAX_DEPTH;                             \
+		xmallctlmibnametomib(mib, miblen, name, &miblen_new);          \
+		assert(miblen_new > miblen);                                   \
+	} while (0)
 
-#define CTL_LEAF(mib, miblen, leaf, v, t) do {			\
-	assert(miblen < CTL_MAX_DEPTH);					\
-	size_t miblen_new = CTL_MAX_DEPTH;				\
-	size_t sz = sizeof(t);						\
-	xmallctlbymibname(mib, miblen, leaf, &miblen_new, (void *)v,	\
-	    &sz, NULL, 0);						\
-	assert(miblen_new == miblen + 1);				\
-} while (0)
+#define CTL_LEAF(mib, miblen, leaf, v, t)                                      \
+	do {                                                                   \
+		assert(miblen < CTL_MAX_DEPTH);                                \
+		size_t miblen_new = CTL_MAX_DEPTH;                             \
+		size_t sz = sizeof(t);                                         \
+		xmallctlbymibname(                                             \
+		    mib, miblen, leaf, &miblen_new, (void *)v, &sz, NULL, 0);  \
+		assert(miblen_new == miblen + 1);                              \
+	} while (0)
 
-#define CTL_M2_GET(n, i, v, t) do {					\
-	size_t mib[CTL_MAX_DEPTH];					\
-	size_t miblen = sizeof(mib) / sizeof(size_t);			\
-	size_t sz = sizeof(t);						\
-	xmallctlnametomib(n, mib, &miblen);				\
-	mib[2] = (i);							\
-	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
-} while (0)
+#define CTL_MIB_GET(n, i, v, t, ind)                                           \
+	do {                                                                   \
+		size_t mib[CTL_MAX_DEPTH];                                     \
+		size_t miblen = sizeof(mib) / sizeof(size_t);                  \
+		size_t sz = sizeof(t);                                         \
+		xmallctlnametomib(n, mib, &miblen);                            \
+		mib[(ind)] = (i);                                              \
+		xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);           \
+	} while (0)
+
+#define CTL_M1_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 1)
+#define CTL_M2_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 2)
 
 /******************************************************************************/
 /* Data. */
 
 bool opt_stats_print = false;
-char opt_stats_print_opts[stats_print_tot_num_options+1] = "";
+char opt_stats_print_opts[stats_print_tot_num_options + 1] = "";
 
 int64_t opt_stats_interval = STATS_INTERVAL_DEFAULT;
-char opt_stats_interval_opts[stats_print_tot_num_options+1] = "";
+char    opt_stats_interval_opts[stats_print_tot_num_options + 1] = "";
 
 static counter_accum_t stats_interval_accumulated;
 /* Per thread batch accum size for stats_interval. */
-static uint64_t stats_interval_accum_batch;
+uint64_t stats_interval_accum_batch;
 
 /******************************************************************************/
 
@@ -108,8 +115,8 @@ get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
 static void
 mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
     emitter_col_t *name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters]) {
 	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
 	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
 
@@ -125,13 +132,13 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 
 #define WIDTH_uint32_t 12
 #define WIDTH_uint64_t 16
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	col = &col_##counter_type[k_##counter_type];			\
-	++k_##counter_type;						\
-	emitter_col_init(col, row);					\
-	col->justify = emitter_justify_right;				\
-	col->width = derived ? 8 : WIDTH_##counter_type;		\
-	col->type = emitter_type_title;					\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	col = &col_##counter_type[k_##counter_type];                           \
+	++k_##counter_type;                                                    \
+	emitter_col_init(col, row);                                            \
+	col->justify = emitter_justify_right;                                  \
+	col->width = derived ? 8 : WIDTH_##counter_type;                       \
+	col->type = emitter_type_title;                                        \
 	col->str_val = human;
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -143,9 +150,9 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 static void
 mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
     emitter_col_t *col_name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t       uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, name);
 	size_t miblen_name = miblen + 1;
 
@@ -154,18 +161,17 @@ mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_name, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_name, #counter,                           \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -176,9 +182,9 @@ mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
 static void
 mutex_stats_read_arena(size_t mib[], size_t miblen, const char *name,
     emitter_col_t *col_name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t       uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, name);
 	size_t miblen_name = miblen + 1;
 
@@ -187,18 +193,17 @@ mutex_stats_read_arena(size_t mib[], size_t miblen, const char *name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_name, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_name, #counter,                           \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -210,7 +215,7 @@ static void
 mutex_stats_read_arena_bin(size_t mib[], size_t miblen,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    uint64_t      uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, "mutex");
 	size_t miblen_mutex = miblen + 1;
 
@@ -218,18 +223,17 @@ mutex_stats_read_arena_bin(size_t mib[], size_t miblen,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_mutex, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_mutex, #counter,                          \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -253,57 +257,55 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, type, human, derived, base_counter)		\
-	if (!derived) {                    \
-		col = &col_##type[k_##type];                        \
-		++k_##type;                            \
+#define OP(counter, type, human, derived, base_counter)                        \
+	if (!derived) {                                                        \
+		col = &col_##type[k_##type];                                   \
 		emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type,        \
-		    (const void *)&col->bool_val); \
-	}
+		    (const void *)&col->bool_val);                             \
+	}                                                                      \
+	++k_##type;
 	MUTEX_PROF_COUNTERS;
 #undef OP
 #undef EMITTER_TYPE_uint32_t
 #undef EMITTER_TYPE_uint64_t
 }
 
-#define COL_DECLARE(column_name)					\
-	emitter_col_t col_##column_name;
+#define COL_DECLARE(column_name) emitter_col_t col_##column_name;
 
-#define COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
-	emitter_col_init(&col_##column_name, &row_name);		\
-	col_##column_name.justify = emitter_justify_##left_or_right;	\
-	col_##column_name.width = col_width;				\
+#define COL_INIT(row_name, column_name, left_or_right, col_width, etype)       \
+	emitter_col_init(&col_##column_name, &row_name);                       \
+	col_##column_name.justify = emitter_justify_##left_or_right;           \
+	col_##column_name.width = col_width;                                   \
 	col_##column_name.type = emitter_type_##etype;
 
-#define COL(row_name, column_name, left_or_right, col_width, etype)	\
-	COL_DECLARE(column_name);					\
+#define COL(row_name, column_name, left_or_right, col_width, etype)            \
+	COL_DECLARE(column_name);                                              \
 	COL_INIT(row_name, column_name, left_or_right, col_width, etype)
 
-#define COL_HDR_DECLARE(column_name)					\
-	COL_DECLARE(column_name);					\
+#define COL_HDR_DECLARE(column_name)                                           \
+	COL_DECLARE(column_name);                                              \
 	emitter_col_t header_##column_name;
 
-#define COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
-	col_width, etype)						\
-	COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
-	emitter_col_init(&header_##column_name, &header_##row_name);	\
-	header_##column_name.justify = emitter_justify_##left_or_right;	\
-	header_##column_name.width = col_width;				\
-	header_##column_name.type = emitter_type_title;			\
+#define COL_HDR_INIT(                                                          \
+    row_name, column_name, human, left_or_right, col_width, etype)             \
+	COL_INIT(row_name, column_name, left_or_right, col_width, etype)       \
+	emitter_col_init(&header_##column_name, &header_##row_name);           \
+	header_##column_name.justify = emitter_justify_##left_or_right;        \
+	header_##column_name.width = col_width;                                \
+	header_##column_name.type = emitter_type_title;                        \
 	header_##column_name.str_val = human ? human : #column_name;
 
-#define COL_HDR(row_name, column_name, human, left_or_right, col_width,	\
-    etype)								\
-	COL_HDR_DECLARE(column_name)					\
-	COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
-	    col_width, etype)
+#define COL_HDR(row_name, column_name, human, left_or_right, col_width, etype) \
+	COL_HDR_DECLARE(column_name)                                           \
+	COL_HDR_INIT(                                                          \
+	    row_name, column_name, human, left_or_right, col_width, etype)
 
 JEMALLOC_COLD
 static void
-stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
-    uint64_t uptime) {
-	size_t page;
-	bool in_gap, in_gap_prev;
+stats_arena_bins_print(
+    emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) {
+	size_t   page;
+	bool     in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -321,12 +323,12 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
-	COL_HDR(row, allocated, NULL, right, 13, uint64)
-	COL_HDR(row, nmalloc, NULL, right, 13, uint64)
+	COL_HDR(row, allocated, NULL, right, 14, size)
+	COL_HDR(row, nmalloc, NULL, right, 14, uint64)
 	COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, ndalloc, NULL, right, 13, uint64)
+	COL_HDR(row, ndalloc, NULL, right, 14, uint64)
 	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, nrequests, NULL, right, 13, uint64)
+	COL_HDR(row, nrequests, NULL, right, 15, uint64)
 	COL_HDR(row, nrequests_ps, "(#/sec)", right, 10, uint64)
 	COL_HDR_DECLARE(prof_live_requested);
 	COL_HDR_DECLARE(prof_live_count);
@@ -366,17 +368,17 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 	emitter_col_t header_mutex32[mutex_prof_num_uint32_t_counters];
 
 	if (mutex) {
-		mutex_stats_init_cols(&row, NULL, NULL, col_mutex64,
-		    col_mutex32);
-		mutex_stats_init_cols(&header_row, NULL, NULL, header_mutex64,
-		    header_mutex32);
+		mutex_stats_init_cols(
+		    &row, NULL, NULL, col_mutex64, col_mutex32);
+		mutex_stats_init_cols(
+		    &header_row, NULL, NULL, header_mutex64, header_mutex32);
 	}
 
 	/*
 	 * We print a "bins:" header as part of the table row; we need to adjust
 	 * the header size column to compensate.
 	 */
-	header_size.width -=5;
+	header_size.width -= 5;
 	emitter_table_printf(emitter, "bins:");
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "bins");
@@ -395,13 +397,13 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 	}
 
 	for (j = 0, in_gap = false; j < nbins; j++) {
-		uint64_t nslabs;
-		size_t reg_size, slab_size, curregs;
-		size_t curslabs;
-		size_t nonfull_slabs;
-		uint32_t nregs, nshards;
-		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
-		uint64_t nreslabs;
+		uint64_t     nslabs;
+		size_t       reg_size, slab_size, curregs;
+		size_t       curslabs;
+		size_t       nonfull_slabs;
+		uint32_t     nregs, nshards;
+		uint64_t     nmalloc, ndalloc, nrequests, nfills, nflushes;
+		uint64_t     nreslabs;
 		prof_stats_t prof_live;
 		prof_stats_t prof_accum;
 
@@ -426,8 +428,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		}
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		if (in_gap && !emitter_outputs_json(emitter)) {
@@ -441,8 +443,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "curregs", &curregs, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
-		    uint64_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "nrequests", &nrequests, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nfills", &nfills, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nflushes", &nflushes, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nreslabs", &nreslabs, uint64_t);
@@ -456,14 +458,14 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		}
 
 		emitter_json_object_begin(emitter);
-		emitter_json_kv(emitter, "nmalloc", emitter_type_uint64,
-		    &nmalloc);
-		emitter_json_kv(emitter, "ndalloc", emitter_type_uint64,
-		    &ndalloc);
-		emitter_json_kv(emitter, "curregs", emitter_type_size,
-		    &curregs);
-		emitter_json_kv(emitter, "nrequests", emitter_type_uint64,
-		    &nrequests);
+		emitter_json_kv(
+		    emitter, "nmalloc", emitter_type_uint64, &nmalloc);
+		emitter_json_kv(
+		    emitter, "ndalloc", emitter_type_uint64, &ndalloc);
+		emitter_json_kv(
+		    emitter, "curregs", emitter_type_size, &curregs);
+		emitter_json_kv(
+		    emitter, "nrequests", emitter_type_uint64, &nrequests);
 		if (prof_stats_on) {
 			emitter_json_kv(emitter, "prof_live_requested",
 			    emitter_type_uint64, &prof_live.req_sum);
@@ -474,28 +476,28 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 			emitter_json_kv(emitter, "prof_accum_count",
 			    emitter_type_uint64, &prof_accum.count);
 		}
-		emitter_json_kv(emitter, "nfills", emitter_type_uint64,
-		    &nfills);
-		emitter_json_kv(emitter, "nflushes", emitter_type_uint64,
-		    &nflushes);
-		emitter_json_kv(emitter, "nreslabs", emitter_type_uint64,
-		    &nreslabs);
-		emitter_json_kv(emitter, "curslabs", emitter_type_size,
-		    &curslabs);
+		emitter_json_kv(
+		    emitter, "nfills", emitter_type_uint64, &nfills);
+		emitter_json_kv(
+		    emitter, "nflushes", emitter_type_uint64, &nflushes);
+		emitter_json_kv(
+		    emitter, "nreslabs", emitter_type_uint64, &nreslabs);
+		emitter_json_kv(
+		    emitter, "curslabs", emitter_type_size, &curslabs);
 		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
 		    &nonfull_slabs);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
-			mutex_stats_emit(emitter, NULL, col_mutex64,
-			    col_mutex32);
+			mutex_stats_emit(
+			    emitter, NULL, col_mutex64, col_mutex32);
 			emitter_json_object_end(emitter);
 		}
 		emitter_json_object_end(emitter);
 
 		size_t availregs = nregs * curslabs;
-		char util[6];
-		if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, util))
-		{
+		char   util[6];
+		if (get_rate_str(
+		        (uint64_t)curregs, (uint64_t)availregs, util)) {
 			if (availregs == 0) {
 				malloc_snprintf(util, sizeof(util), "1");
 			} else if (curregs > availregs) {
@@ -519,7 +521,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		col_ndalloc.uint64_val = ndalloc;
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
-		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		col_nrequests_ps.uint64_val = rate_per_second(
+		    nrequests, uptime);
 		if (prof_stats_on) {
 			col_prof_live_requested.uint64_val = prof_live.req_sum;
 			col_prof_live_count.uint64_val = prof_live.count;
@@ -560,7 +563,7 @@ JEMALLOC_COLD
 static void
 stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	unsigned nbins, nlextents, j;
-	bool in_gap, in_gap_prev;
+	bool     in_gap, in_gap_prev;
 
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlextents", &nlextents, unsigned);
@@ -614,8 +617,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	}
 
 	for (j = 0, in_gap = false; j < nlextents; j++) {
-		uint64_t nmalloc, ndalloc, nrequests;
-		size_t lextent_size, curlextents;
+		uint64_t     nmalloc, ndalloc, nrequests;
+		size_t       lextent_size, curlextents;
 		prof_stats_t prof_live;
 		prof_stats_t prof_accum;
 
@@ -624,20 +627,20 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 
 		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
-		    uint64_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "nrequests", &nrequests, uint64_t);
 
 		in_gap_prev = in_gap;
 		in_gap = (nrequests == 0);
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		CTL_LEAF(arenas_lextent_mib, 3, "size", &lextent_size, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "curlextents", &curlextents,
-		    size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "curlextents", &curlextents, size_t);
 
 		if (prof_stats_on) {
 			prof_stats_mib[3] = j;
@@ -658,8 +661,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 			emitter_json_kv(emitter, "prof_accum_count",
 			    emitter_type_uint64, &prof_accum.count);
 		}
-		emitter_json_kv(emitter, "curlextents", emitter_type_size,
-		    &curlextents);
+		emitter_json_kv(
+		    emitter, "curlextents", emitter_type_size, &curlextents);
 		emitter_json_object_end(emitter);
 
 		col_size.size_val = lextent_size;
@@ -670,7 +673,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_ndalloc.uint64_val = ndalloc;
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
-		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		col_nrequests_ps.uint64_val = rate_per_second(
+		    nrequests, uptime);
 		if (prof_stats_on) {
 			col_prof_live_requested.uint64_val = prof_live.req_sum;
 			col_prof_live_count.uint64_val = prof_live.count;
@@ -693,8 +697,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 JEMALLOC_COLD
 static void
 stats_arena_extents_print(emitter_t *emitter, unsigned i) {
-	unsigned j;
-	bool in_gap, in_gap_prev;
+	unsigned      j;
+	bool          in_gap, in_gap_prev;
 	emitter_row_t header_row;
 	emitter_row_init(&header_row);
 	emitter_row_t row;
@@ -731,12 +735,12 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 		CTL_LEAF(stats_arenas_mib, 5, "ndirty", &ndirty, size_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nmuzzy", &nmuzzy, size_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nretained", &nretained, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes,
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "retained_bytes", &retained_bytes,
 		    size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes,
-		    size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "retained_bytes",
-		    &retained_bytes, size_t);
 
 		total = ndirty + nmuzzy + nretained;
 		total_bytes = dirty_bytes + muzzy_bytes + retained_bytes;
@@ -745,20 +749,20 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 		in_gap = (total == 0);
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "ndirty", emitter_type_size, &ndirty);
 		emitter_json_kv(emitter, "nmuzzy", emitter_type_size, &nmuzzy);
-		emitter_json_kv(emitter, "nretained", emitter_type_size,
-		    &nretained);
+		emitter_json_kv(
+		    emitter, "nretained", emitter_type_size, &nretained);
 
-		emitter_json_kv(emitter, "dirty_bytes", emitter_type_size,
-		    &dirty_bytes);
-		emitter_json_kv(emitter, "muzzy_bytes", emitter_type_size,
-		    &muzzy_bytes);
+		emitter_json_kv(
+		    emitter, "dirty_bytes", emitter_type_size, &dirty_bytes);
+		emitter_json_kv(
+		    emitter, "muzzy_bytes", emitter_type_size, &muzzy_bytes);
 		emitter_json_kv(emitter, "retained_bytes", emitter_type_size,
 		    &retained_bytes);
 		emitter_json_object_end(emitter);
@@ -785,25 +789,156 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
-stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
-	emitter_row_t header_row;
-	emitter_row_init(&header_row);
-	emitter_row_t row;
-	emitter_row_init(&row);
+stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
+	size_t sec_bytes;
+	size_t sec_hits;
+	size_t sec_misses;
+	size_t sec_dalloc_flush;
+	size_t sec_dalloc_noflush;
+	size_t sec_overfills;
+	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
+	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
+	    emitter_type_size, &sec_bytes);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_hits", i, &sec_hits, size_t);
+	emitter_kv(emitter, "sec_hits", "Total hits in small extent cache",
+	    emitter_type_size, &sec_hits);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_misses", i, &sec_misses, size_t);
+	emitter_kv(emitter, "sec_misses", "Total misses in small extent cache",
+	    emitter_type_size, &sec_misses);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_dalloc_noflush", i,
+	    &sec_dalloc_noflush, size_t);
+	emitter_kv(emitter, "sec_dalloc_noflush",
+	    "Dalloc calls without flush in small extent cache",
+	    emitter_type_size, &sec_dalloc_noflush);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_dalloc_flush", i, &sec_dalloc_flush,
+	    size_t);
+	emitter_kv(emitter, "sec_dalloc_flush",
+	    "Dalloc calls with flush in small extent cache", emitter_type_size,
+	    &sec_dalloc_flush);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_sec_overfills", i, &sec_overfills, size_t);
+	emitter_kv(emitter, "sec_overfills",
+	    "sec_fill calls that went over max_bytes", emitter_type_size,
+	    &sec_overfills);
+}
+
+static void
+stats_arena_hpa_shard_counters_print(
+    emitter_t *emitter, unsigned i, uint64_t uptime) {
+	size_t npageslabs;
+	size_t nactive;
+	size_t ndirty;
+
+	size_t npageslabs_nonhuge;
+	size_t nactive_nonhuge;
+	size_t ndirty_nonhuge;
+	size_t nretained_nonhuge;
+
+	size_t npageslabs_huge;
+	size_t nactive_huge;
+	size_t ndirty_huge;
 
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
+	uint64_t nhugify_failures;
 	uint64_t ndehugifies;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
-	    i, &npurge_passes, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
-	    i, &npurges, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugifies",
-	    i, &nhugifies, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
-	    i, &ndehugifies, uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.npageslabs", i, &npageslabs, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nactive", i, &nactive, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.ndirty", i, &ndirty, size_t);
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
+	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge - ndirty_nonhuge;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes", i, &npurge_passes,
+	    uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges", i, &npurges, uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.nhugifies", i, &nhugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugify_failures", i,
+	    &nhugify_failures, uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.ndehugifies", i, &ndehugifies, uint64_t);
+
+	emitter_table_printf(emitter,
+	    "HPA shard stats:\n"
+	    "  Pageslabs: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Active pages: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Dirty pages: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Retained pages: %zu\n"
+	    "  Purge passes: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Purges: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Hugeifies: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Hugify failures: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Dehugifies: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "\n",
+	    npageslabs, npageslabs_huge, npageslabs_nonhuge, nactive,
+	    nactive_huge, nactive_nonhuge, ndirty, ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge, npurge_passes,
+	    rate_per_second(npurge_passes, uptime), npurges,
+	    rate_per_second(npurges, uptime), nhugifies,
+	    rate_per_second(nhugifies, uptime), nhugify_failures,
+	    rate_per_second(nhugify_failures, uptime), ndehugifies,
+	    rate_per_second(ndehugifies, uptime));
+
+	emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs);
+	emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive);
+	emitter_json_kv(emitter, "ndirty", emitter_type_size, &ndirty);
+
+	emitter_json_kv(
+	    emitter, "npurge_passes", emitter_type_uint64, &npurge_passes);
+	emitter_json_kv(emitter, "npurges", emitter_type_uint64, &npurges);
+	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64, &nhugifies);
+	emitter_json_kv(emitter, "nhugify_failures", emitter_type_uint64,
+	    &nhugify_failures);
+	emitter_json_kv(
+	    emitter, "ndehugifies", emitter_type_uint64, &ndehugifies);
+
+	emitter_json_object_kv_begin(emitter, "slabs");
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
+
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
+	emitter_json_object_end(emitter); /* End "slabs" */
+}
+
+static void
+stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
+	emitter_row_t header_row;
+	emitter_row_init(&header_row);
+	emitter_row_t row;
+	emitter_row_init(&row);
 
 	size_t npageslabs_huge;
 	size_t nactive_huge;
@@ -814,48 +949,20 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t ndirty_nonhuge;
 	size_t nretained_nonhuge;
 
-	size_t sec_bytes;
-	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
-	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
-	    emitter_type_size, &sec_bytes);
+	/* Full slab stats. */
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
 
-	/* First, global stats. */
-	emitter_table_printf(emitter,
-	    "HPA shard stats:\n"
-	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "\n",
-	    npurge_passes, rate_per_second(npurge_passes, uptime),
-	    npurges, rate_per_second(npurges, uptime),
-	    nhugifies, rate_per_second(nhugifies, uptime),
-	    ndehugifies, rate_per_second(ndehugifies, uptime));
-
-	emitter_json_object_kv_begin(emitter, "hpa_shard");
-	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
-	    &npurge_passes);
-	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
-	    &npurges);
-	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
-	    &nhugifies);
-	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
-	    &ndehugifies);
-
-	/* Next, full slab stats. */
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
-	    i, &npageslabs_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
-	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge",
-	    i, &ndirty_huge, size_t);
-
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
-	    i, &npageslabs_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
-	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge",
-	    i, &ndirty_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
@@ -865,40 +972,40 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    npageslabs_huge, npageslabs_nonhuge,
-	    nactive_huge, nactive_nonhuge,
-	    ndirty_huge, ndirty_nonhuge,
-	    nretained_nonhuge);
+	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge, nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "full_slabs");
-	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
-	    &npageslabs_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
-	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
-	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
-	    &ndirty_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	/* Next, empty slab stats. */
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_huge",
-	    i, &npageslabs_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_huge",
-	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge",
-	    i, &ndirty_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_nonhuge",
-	    i, &npageslabs_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_nonhuge",
-	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge",
-	    i, &ndirty_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
@@ -907,28 +1014,28 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
-	    "      nretained: 0 huge, %zu nonhuge \n"
-	    "\n",
-	    npageslabs_huge, npageslabs_nonhuge,
-	    nactive_huge, nactive_nonhuge,
-	    ndirty_huge, ndirty_nonhuge,
-	    nretained_nonhuge);
+	    "      nretained: 0 huge, %zu nonhuge \n",
+	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge, nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "empty_slabs");
-	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
-	    &npageslabs_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
-	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
-	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
-	    &ndirty_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
 	emitter_json_object_end(emitter); /* End "empty_slabs" */
 
+	/* Last, nonfull slab stats. */
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
@@ -944,6 +1051,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	stats_arenas_mib[2] = i;
 	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "hpa_shard.nonfull_slabs");
 
+	emitter_table_printf(emitter, "  In nonfull slabs:\n");
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;
@@ -952,25 +1060,25 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_huge",
 		    &npageslabs_huge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
-		    &nactive_huge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ndirty_huge",
-		    &ndirty_huge, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 6, "nactive_huge", &nactive_huge, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 6, "ndirty_huge", &ndirty_huge, size_t);
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
 		    &nactive_nonhuge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge",
-		    &ndirty_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge", &ndirty_nonhuge,
+		    size_t);
 		nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 		    - nactive_nonhuge - ndirty_nonhuge;
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		col_size.size_val = sz_pind2sz(j);
@@ -989,27 +1097,38 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 		    &npageslabs_huge);
-		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-		    &nactive_huge);
-		emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
-		    &ndirty_huge);
-		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
-		    &npageslabs_nonhuge);
+		emitter_json_kv(
+		    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+		emitter_json_kv(
+		    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
+		emitter_json_kv(emitter, "npageslabs_nonhuge",
+		    emitter_type_size, &npageslabs_nonhuge);
 		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 		    &nactive_nonhuge);
 		emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
 		    &ndirty_nonhuge);
+		emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+		    &nretained_nonhuge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
-	emitter_json_object_end(emitter); /* End "hpa_shard" */
 	if (in_gap) {
 		emitter_table_printf(emitter, "                     ---\n");
 	}
 }
 
 static void
-stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
+stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
+	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	stats_arena_hpa_shard_sec_print(emitter, i);
+	stats_arena_hpa_shard_counters_print(emitter, i, uptime);
+	stats_arena_hpa_shard_slabs_print(emitter, i);
+	emitter_json_object_end(emitter); /* End "hpa_shard" */
+}
+
+static void
+stats_arena_mutexes_print(
+    emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_col_t col_name;
 	emitter_col_t col64[mutex_prof_num_uint64_t_counters];
@@ -1030,8 +1149,8 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 	    i++) {
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
-		mutex_stats_read_arena(stats_arenas_mib, 4, name, &col_name,
-		    col64, col32, uptime);
+		mutex_stats_read_arena(
+		    stats_arenas_mib, 4, name, &col_name, col64, col32, uptime);
 		mutex_stats_emit(emitter, &row, col64, col32);
 		emitter_json_object_end(emitter); /* Close the mutex dict. */
 	}
@@ -1042,78 +1161,86 @@ JEMALLOC_COLD
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
     bool mutex, bool extents, bool hpa) {
-	unsigned nthreads;
+	char        name[ARENA_NAME_LEN];
+	char       *namep = name;
+	unsigned    nthreads;
 	const char *dss;
-	ssize_t dirty_decay_ms, muzzy_decay_ms;
-	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
-	size_t base, internal, resident, metadata_thp, extent_avail;
+	ssize_t     dirty_decay_ms, muzzy_decay_ms;
+	size_t      page, pactive, pdirty, pmuzzy, mapped, retained;
+	size_t      base, internal, resident, metadata_edata, metadata_rtree,
+	    metadata_thp, extent_avail;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
-	size_t small_allocated;
+	size_t   small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests, small_nfills,
 	    small_nflushes;
-	size_t large_allocated;
+	size_t   large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills,
 	    large_nflushes;
-	size_t tcache_bytes, tcache_stashed_bytes, abandoned_vm;
+	size_t   tcache_bytes, tcache_stashed_bytes, abandoned_vm;
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
+	if (i != MALLCTL_ARENAS_ALL && i != MALLCTL_ARENAS_DESTROYED) {
+		CTL_M1_GET("arena.0.name", i, (void *)&namep, const char *);
+		emitter_kv(
+		    emitter, "name", "name", emitter_type_string, &namep);
+	}
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
 	emitter_kv(emitter, "nthreads", "assigned threads",
 	    emitter_type_unsigned, &nthreads);
 
 	CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t);
-	emitter_kv(emitter, "uptime_ns", "uptime", emitter_type_uint64,
-	    &uptime);
+	emitter_kv(
+	    emitter, "uptime_ns", "uptime", emitter_type_uint64, &uptime);
 
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
 	emitter_kv(emitter, "dss", "dss allocation precedence",
 	    emitter_type_string, &dss);
 
-	CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms,
-	    ssize_t);
-	CTL_M2_GET("stats.arenas.0.muzzy_decay_ms", i, &muzzy_decay_ms,
-	    ssize_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms, ssize_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.muzzy_decay_ms", i, &muzzy_decay_ms, ssize_t);
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
 	CTL_M2_GET("stats.arenas.0.pmuzzy", i, &pmuzzy, size_t);
 	CTL_M2_GET("stats.arenas.0.dirty_npurge", i, &dirty_npurge, uint64_t);
-	CTL_M2_GET("stats.arenas.0.dirty_nmadvise", i, &dirty_nmadvise,
-	    uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.dirty_nmadvise", i, &dirty_nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.dirty_purged", i, &dirty_purged, uint64_t);
 	CTL_M2_GET("stats.arenas.0.muzzy_npurge", i, &muzzy_npurge, uint64_t);
-	CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise,
-	    uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t);
 
 	emitter_row_t decay_row;
 	emitter_row_init(&decay_row);
 
 	/* JSON-style emission. */
-	emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize,
-	    &dirty_decay_ms);
-	emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize,
-	    &muzzy_decay_ms);
+	emitter_json_kv(
+	    emitter, "dirty_decay_ms", emitter_type_ssize, &dirty_decay_ms);
+	emitter_json_kv(
+	    emitter, "muzzy_decay_ms", emitter_type_ssize, &muzzy_decay_ms);
 
 	emitter_json_kv(emitter, "pactive", emitter_type_size, &pactive);
 	emitter_json_kv(emitter, "pdirty", emitter_type_size, &pdirty);
 	emitter_json_kv(emitter, "pmuzzy", emitter_type_size, &pmuzzy);
 
-	emitter_json_kv(emitter, "dirty_npurge", emitter_type_uint64,
-	    &dirty_npurge);
-	emitter_json_kv(emitter, "dirty_nmadvise", emitter_type_uint64,
-	    &dirty_nmadvise);
-	emitter_json_kv(emitter, "dirty_purged", emitter_type_uint64,
-	    &dirty_purged);
+	emitter_json_kv(
+	    emitter, "dirty_npurge", emitter_type_uint64, &dirty_npurge);
+	emitter_json_kv(
+	    emitter, "dirty_nmadvise", emitter_type_uint64, &dirty_nmadvise);
+	emitter_json_kv(
+	    emitter, "dirty_purged", emitter_type_uint64, &dirty_purged);
 
-	emitter_json_kv(emitter, "muzzy_npurge", emitter_type_uint64,
-	    &muzzy_npurge);
-	emitter_json_kv(emitter, "muzzy_nmadvise", emitter_type_uint64,
-	    &muzzy_nmadvise);
-	emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64,
-	    &muzzy_purged);
+	emitter_json_kv(
+	    emitter, "muzzy_npurge", emitter_type_uint64, &muzzy_npurge);
+	emitter_json_kv(
+	    emitter, "muzzy_nmadvise", emitter_type_uint64, &muzzy_nmadvise);
+	emitter_json_kv(
+	    emitter, "muzzy_purged", emitter_type_uint64, &muzzy_purged);
 
 	/* Table-style emission. */
 	COL(decay_row, decay_type, right, 9, title);
@@ -1230,12 +1357,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nfills_ps.type = emitter_type_uint64;
 	col_count_nflushes_ps.type = emitter_type_uint64;
 
-#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)		\
-	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,	\
-	    &small_or_large##_##name, valtype##_t);			\
-	emitter_json_kv(emitter, #name, emitter_type_##valtype,		\
-	    &small_or_large##_##name);					\
-	col_count_##name.type = emitter_type_##valtype;		\
+#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)                 \
+	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,             \
+	    &small_or_large##_##name, valtype##_t);                            \
+	emitter_json_kv(                                                       \
+	    emitter, #name, emitter_type_##valtype, &small_or_large##_##name); \
+	col_count_##name.type = emitter_type_##valtype;                        \
 	col_count_##name.valtype##_val = small_or_large##_##name;
 
 	emitter_json_object_kv_begin(emitter, "small");
@@ -1243,20 +1370,20 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	GET_AND_EMIT_ALLOC_STAT(small, allocated, size)
 	GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64)
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64)
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nfills, uint64)
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nflushes, uint64)
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "small". */
@@ -1266,20 +1393,20 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	GET_AND_EMIT_ALLOC_STAT(large, allocated, size)
 	GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64)
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64)
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nfills, uint64)
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nflushes, uint64)
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "large". */
@@ -1294,16 +1421,16 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nrequests.uint64_val = small_nrequests + large_nrequests;
 	col_count_nfills.uint64_val = small_nfills + large_nfills;
 	col_count_nflushes.uint64_val = small_nflushes + large_nflushes;
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 	emitter_table_row(emitter, &alloc_count_row);
 
 	emitter_row_t mem_count_row;
@@ -1331,17 +1458,19 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	mem_count_val.size_val = pactive * page;
 	emitter_table_row(emitter, &mem_count_row);
 
-#define GET_AND_EMIT_MEM_STAT(stat)					\
-	CTL_M2_GET("stats.arenas.0."#stat, i, &stat, size_t);		\
-	emitter_json_kv(emitter, #stat, emitter_type_size, &stat);	\
-	mem_count_title.str_val = #stat":";				\
-	mem_count_val.size_val = stat;					\
+#define GET_AND_EMIT_MEM_STAT(stat)                                            \
+	CTL_M2_GET("stats.arenas.0." #stat, i, &stat, size_t);                 \
+	emitter_json_kv(emitter, #stat, emitter_type_size, &stat);             \
+	mem_count_title.str_val = #stat ":";                                   \
+	mem_count_val.size_val = stat;                                         \
 	emitter_table_row(emitter, &mem_count_row);
 
 	GET_AND_EMIT_MEM_STAT(mapped)
 	GET_AND_EMIT_MEM_STAT(retained)
 	GET_AND_EMIT_MEM_STAT(base)
 	GET_AND_EMIT_MEM_STAT(internal)
+	GET_AND_EMIT_MEM_STAT(metadata_edata)
+	GET_AND_EMIT_MEM_STAT(metadata_rtree)
 	GET_AND_EMIT_MEM_STAT(metadata_thp)
 	GET_AND_EMIT_MEM_STAT(tcache_bytes)
 	GET_AND_EMIT_MEM_STAT(tcache_stashed_bytes)
@@ -1371,13 +1500,13 @@ JEMALLOC_COLD
 static void
 stats_general_print(emitter_t *emitter) {
 	const char *cpv;
-	bool bv, bv2;
-	unsigned uv;
-	uint32_t u32v;
-	uint64_t u64v;
-	int64_t i64v;
-	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
+	bool        bv, bv2;
+	unsigned    uv;
+	uint32_t    u32v;
+	uint64_t    u64v;
+	int64_t     i64v;
+	ssize_t     ssv, ssv2;
+	size_t      sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
@@ -1393,11 +1522,11 @@ stats_general_print(emitter_t *emitter) {
 
 	/* config. */
 	emitter_dict_begin(emitter, "config", "Build-time option settings");
-#define CONFIG_WRITE_BOOL(name)						\
-	do {								\
-		CTL_GET("config."#name, &bv, bool);			\
-		emitter_kv(emitter, #name, "config."#name,		\
-		    emitter_type_bool, &bv);				\
+#define CONFIG_WRITE_BOOL(name)                                                \
+	do {                                                                   \
+		CTL_GET("config." #name, &bv, bool);                           \
+		emitter_kv(                                                    \
+		    emitter, #name, "config." #name, emitter_type_bool, &bv);  \
 	} while (0)
 
 	CONFIG_WRITE_BOOL(cache_oblivious);
@@ -1411,59 +1540,103 @@ stats_general_print(emitter_t *emitter) {
 	CONFIG_WRITE_BOOL(prof);
 	CONFIG_WRITE_BOOL(prof_libgcc);
 	CONFIG_WRITE_BOOL(prof_libunwind);
+	CONFIG_WRITE_BOOL(prof_frameptr);
 	CONFIG_WRITE_BOOL(stats);
 	CONFIG_WRITE_BOOL(utrace);
 	CONFIG_WRITE_BOOL(xmalloc);
 #undef CONFIG_WRITE_BOOL
 	emitter_dict_end(emitter); /* Close "config" dict. */
 
+	/* system. */
+	emitter_dict_begin(emitter, "system", "System configuration");
+
+	/*
+	 * This shows system's THP mode detected at jemalloc's init time.
+	 * jemalloc does not re-detect the mode even if it changes after
+	 * jemalloc's init.  It is assumed that system's THP mode is stable
+	 * during the process's lifetime and a violation could lead to
+	 * undefined behavior.
+	*/
+	const char *thp_mode_name = system_thp_mode_names[init_system_thp_mode];
+	emitter_kv(emitter, "thp_mode", "system.thp_mode", emitter_type_string,
+	    &thp_mode_name);
+
+	emitter_dict_end(emitter); /* Close "system". */
+
 	/* opt. */
-#define OPT_WRITE(name, var, size, emitter_type)			\
-	if (je_mallctl("opt."name, (void *)&var, &size, NULL, 0) ==	\
-	    0) {							\
-		emitter_kv(emitter, name, "opt."name, emitter_type,	\
-		    &var);						\
+#define OPT_WRITE(name, var, size, emitter_type)                               \
+	if (je_mallctl("opt." name, (void *)&var, &size, NULL, 0) == 0) {      \
+		emitter_kv(emitter, name, "opt." name, emitter_type, &var);    \
 	}
 
-#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type,		\
-    altname)								\
-	if (je_mallctl("opt."name, (void *)&var1, &size, NULL, 0) ==	\
-	    0 && je_mallctl(altname, (void *)&var2, &size, NULL, 0)	\
-	    == 0) {							\
-		emitter_kv_note(emitter, name, "opt."name,		\
-		    emitter_type, &var1, altname, emitter_type,		\
-		    &var2);						\
+#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type, altname)       \
+	if (je_mallctl("opt." name, (void *)&var1, &size, NULL, 0) == 0        \
+	    && je_mallctl(altname, (void *)&var2, &size, NULL, 0) == 0) {      \
+		emitter_kv_note(emitter, name, "opt." name, emitter_type,      \
+		    &var1, altname, emitter_type, &var2);                      \
 	}
 
 #define OPT_WRITE_BOOL(name) OPT_WRITE(name, bv, bsz, emitter_type_bool)
-#define OPT_WRITE_BOOL_MUTABLE(name, altname)				\
+#define OPT_WRITE_BOOL_MUTABLE(name, altname)                                  \
 	OPT_WRITE_MUTABLE(name, bv, bv2, bsz, emitter_type_bool, altname)
 
-#define OPT_WRITE_UNSIGNED(name)					\
-	OPT_WRITE(name, uv, usz, emitter_type_unsigned)
+#define OPT_WRITE_UNSIGNED(name) OPT_WRITE(name, uv, usz, emitter_type_unsigned)
 
-#define OPT_WRITE_INT64(name)						\
-	OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
-#define OPT_WRITE_UINT64(name)						\
-	OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
+#define OPT_WRITE_INT64(name) OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
+#define OPT_WRITE_UINT64(name) OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
 
-#define OPT_WRITE_SIZE_T(name)						\
-	OPT_WRITE(name, sv, ssz, emitter_type_size)
-#define OPT_WRITE_SSIZE_T(name)						\
-	OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
-#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname)			\
-	OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize,	\
-	    altname)
+#define OPT_WRITE_SIZE_T(name) OPT_WRITE(name, sv, ssz, emitter_type_size)
+#define OPT_WRITE_SSIZE_T(name) OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
+#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname)                               \
+	OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize, altname)
 
-#define OPT_WRITE_CHAR_P(name)						\
-	OPT_WRITE(name, cpv, cpsz, emitter_type_string)
+#define OPT_WRITE_CHAR_P(name) OPT_WRITE(name, cpv, cpsz, emitter_type_string)
 
 	emitter_dict_begin(emitter, "opt", "Run-time option settings");
 
+	/*
+	 * opt.malloc_conf.
+	 *
+	 * Sources are documented in https://jemalloc.net/jemalloc.3.html#tuning
+	 * - (Not Included Here) The string specified via --with-malloc-conf,
+	 *     which is already printed out above as config.malloc_conf
+	 * - (Included) The string pointed to by the global variable malloc_conf
+	 * - (Included) The “name” of the file referenced by the symbolic link
+	 *     named /etc/malloc.conf
+	 * - (Included) The value of the environment variable MALLOC_CONF
+	 * - (Optional, Unofficial) The string pointed to by the global variable
+	 *     malloc_conf_2_conf_harder, which is hidden from the public.
+	 *
+	 * Note: The outputs are strictly ordered by priorities (low -> high).
+	 *
+	 */
+#define MALLOC_CONF_WRITE(name, message)                                       \
+	if (je_mallctl("opt.malloc_conf." name, (void *)&cpv, &cpsz, NULL, 0)  \
+	    != 0) {                                                            \
+		cpv = "";                                                      \
+	}                                                                      \
+	emitter_kv(emitter, name, message, emitter_type_string, &cpv);
+
+	MALLOC_CONF_WRITE("global_var", "Global variable malloc_conf");
+	MALLOC_CONF_WRITE("symlink", "Symbolic link malloc.conf");
+	MALLOC_CONF_WRITE("env_var", "Environment variable MALLOC_CONF");
+	/* As this config is unofficial, skip the output if it's NULL */
+	if (je_mallctl("opt.malloc_conf.global_var_2_conf_harder", (void *)&cpv,
+	        &cpsz, NULL, 0)
+	    == 0) {
+		emitter_kv(emitter, "global_var_2_conf_harder",
+		    "Global "
+		    "variable malloc_conf_2_conf_harder",
+		    emitter_type_string, &cpv);
+	}
+#undef MALLOC_CONF_WRITE
+
 	OPT_WRITE_BOOL("abort")
 	OPT_WRITE_BOOL("abort_conf")
 	OPT_WRITE_BOOL("cache_oblivious")
 	OPT_WRITE_BOOL("confirm_conf")
+	OPT_WRITE_BOOL("experimental_hpa_start_huge_if_thp_always")
+	OPT_WRITE_BOOL("experimental_hpa_enforce_hugify")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
@@ -1473,7 +1646,9 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
+	OPT_WRITE_BOOL("hpa_hugify_sync")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
+	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
@@ -1492,11 +1667,14 @@ stats_general_print(emitter_t *emitter) {
 			    "opt.hpa_dirty_mult", emitter_type_string, &bufp);
 		}
 	}
+	OPT_WRITE_SIZE_T("hpa_purge_threshold")
+	OPT_WRITE_UINT64("hpa_min_purge_delay_ms")
+	OPT_WRITE_CHAR_P("hpa_hugify_style")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
-	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
+	OPT_WRITE_BOOL("huge_arena_pac_thp")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_INT64("mutex_max_spin")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
@@ -1508,6 +1686,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
+	OPT_WRITE_BOOL("experimental_tcache_gc")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
@@ -1518,12 +1697,14 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("tcache_gc_delay_bytes")
 	OPT_WRITE_UNSIGNED("lg_tcache_flush_small_div")
 	OPT_WRITE_UNSIGNED("lg_tcache_flush_large_div")
+	OPT_WRITE_UNSIGNED("debug_double_free_max_scan")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
+	OPT_WRITE_UNSIGNED("prof_bt_max")
 	OPT_WRITE_CHAR_P("prof_prefix")
 	OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active")
-	OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init",
-	    "prof.thread_active_init")
+	OPT_WRITE_BOOL_MUTABLE(
+	    "prof_thread_active_init", "prof.thread_active_init")
 	OPT_WRITE_SSIZE_T_MUTABLE("lg_prof_sample", "prof.lg_sample")
 	OPT_WRITE_BOOL("prof_accum")
 	OPT_WRITE_SSIZE_T("lg_prof_interval")
@@ -1538,8 +1719,10 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_INT64("stats_interval")
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
+	OPT_WRITE_SIZE_T("process_madvise_max_batch")
+	OPT_WRITE_BOOL("disable_large_size_classes")
 
-	emitter_dict_end(emitter);
+	emitter_dict_end(emitter); /* Close "opt". */
 
 #undef OPT_WRITE
 #undef OPT_WRITE_MUTABLE
@@ -1559,12 +1742,12 @@ stats_general_print(emitter_t *emitter) {
 		    "prof.thread_active_init", emitter_type_bool, &bv);
 
 		CTL_GET("prof.active", &bv, bool);
-		emitter_kv(emitter, "active", "prof.active", emitter_type_bool,
-		    &bv);
+		emitter_kv(
+		    emitter, "active", "prof.active", emitter_type_bool, &bv);
 
 		CTL_GET("prof.gdump", &bv, bool);
-		emitter_kv(emitter, "gdump", "prof.gdump", emitter_type_bool,
-		    &bv);
+		emitter_kv(
+		    emitter, "gdump", "prof.gdump", emitter_type_bool, &bv);
 
 		CTL_GET("prof.interval", &u64v, uint64_t);
 		emitter_kv(emitter, "interval", "prof.interval",
@@ -1603,6 +1786,10 @@ stats_general_print(emitter_t *emitter) {
 	CTL_GET("arenas.page", &sv, size_t);
 	emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv);
 
+	CTL_GET("arenas.hugepage", &sv, size_t);
+	emitter_kv(
+	    emitter, "hugepage", "Hugepage size", emitter_type_size, &sv);
+
 	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
 		emitter_kv(emitter, "tcache_max",
 		    "Maximum thread-cached size class", emitter_type_size, &sv);
@@ -1631,20 +1818,20 @@ stats_general_print(emitter_t *emitter) {
 			emitter_json_object_begin(emitter);
 
 			CTL_LEAF(arenas_bin_mib, 3, "size", &sv, size_t);
-			emitter_json_kv(emitter, "size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "size", emitter_type_size, &sv);
 
 			CTL_LEAF(arenas_bin_mib, 3, "nregs", &u32v, uint32_t);
-			emitter_json_kv(emitter, "nregs", emitter_type_uint32,
-			    &u32v);
+			emitter_json_kv(
+			    emitter, "nregs", emitter_type_uint32, &u32v);
 
 			CTL_LEAF(arenas_bin_mib, 3, "slab_size", &sv, size_t);
-			emitter_json_kv(emitter, "slab_size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "slab_size", emitter_type_size, &sv);
 
 			CTL_LEAF(arenas_bin_mib, 3, "nshards", &u32v, uint32_t);
-			emitter_json_kv(emitter, "nshards", emitter_type_uint32,
-			    &u32v);
+			emitter_json_kv(
+			    emitter, "nshards", emitter_type_uint32, &u32v);
 
 			emitter_json_object_end(emitter);
 		}
@@ -1665,8 +1852,8 @@ stats_general_print(emitter_t *emitter) {
 			emitter_json_object_begin(emitter);
 
 			CTL_LEAF(arenas_lextent_mib, 3, "size", &sv, size_t);
-			emitter_json_kv(emitter, "size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "size", emitter_type_size, &sv);
 
 			emitter_json_object_end(emitter);
 		}
@@ -1684,15 +1871,17 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	 * These should be deleted.  We keep them around for a while, to aid in
 	 * the transition to the emitter code.
 	 */
-	size_t allocated, active, metadata, metadata_thp, resident, mapped,
-	    retained;
-	size_t num_background_threads;
-	size_t zero_reallocs;
+	size_t allocated, active, metadata, metadata_edata, metadata_rtree,
+	    metadata_thp, resident, mapped, retained;
+	size_t   num_background_threads;
+	size_t   zero_reallocs;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
 	CTL_GET("stats.active", &active, size_t);
 	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.metadata_edata", &metadata_edata, size_t);
+	CTL_GET("stats.metadata_rtree", &metadata_rtree, size_t);
 	CTL_GET("stats.metadata_thp", &metadata_thp, size_t);
 	CTL_GET("stats.resident", &resident, size_t);
 	CTL_GET("stats.mapped", &mapped, size_t);
@@ -1718,18 +1907,24 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated);
 	emitter_json_kv(emitter, "active", emitter_type_size, &active);
 	emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata);
-	emitter_json_kv(emitter, "metadata_thp", emitter_type_size,
-	    &metadata_thp);
+	emitter_json_kv(
+	    emitter, "metadata_edata", emitter_type_size, &metadata_edata);
+	emitter_json_kv(
+	    emitter, "metadata_rtree", emitter_type_size, &metadata_rtree);
+	emitter_json_kv(
+	    emitter, "metadata_thp", emitter_type_size, &metadata_thp);
 	emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
 	emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped);
 	emitter_json_kv(emitter, "retained", emitter_type_size, &retained);
-	emitter_json_kv(emitter, "zero_reallocs", emitter_type_size,
-	    &zero_reallocs);
+	emitter_json_kv(
+	    emitter, "zero_reallocs", emitter_type_size, &zero_reallocs);
 
-	emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
-	    "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, "
-	    "retained: %zu\n", allocated, active, metadata, metadata_thp,
-	    resident, mapped, retained);
+	emitter_table_printf(emitter,
+	    "Allocated: %zu, active: %zu, "
+	    "metadata: %zu (n_thp %zu, edata %zu, rtree %zu), resident: %zu, "
+	    "mapped: %zu, retained: %zu\n",
+	    allocated, active, metadata, metadata_thp, metadata_edata,
+	    metadata_rtree, resident, mapped, retained);
 
 	/* Strange behaviors */
 	emitter_table_printf(emitter,
@@ -1737,16 +1932,17 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 	/* Background thread stats. */
 	emitter_json_object_kv_begin(emitter, "background_thread");
-	emitter_json_kv(emitter, "num_threads", emitter_type_size,
-	    &num_background_threads);
+	emitter_json_kv(
+	    emitter, "num_threads", emitter_type_size, &num_background_threads);
 	emitter_json_kv(emitter, "num_runs", emitter_type_uint64,
 	    &background_thread_num_runs);
 	emitter_json_kv(emitter, "run_interval", emitter_type_uint64,
 	    &background_thread_run_interval);
 	emitter_json_object_end(emitter); /* Close "background_thread". */
 
-	emitter_table_printf(emitter, "Background threads: %zu, "
-	    "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n",
+	emitter_table_printf(emitter,
+	    "Background threads: %zu, "
+	    "num_runs: %" FMTu64 ", run_interval: %" FMTu64 " ns\n",
 	    num_background_threads, background_thread_num_runs,
 	    background_thread_run_interval);
 
@@ -1755,7 +1951,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		emitter_col_t name;
 		emitter_col_t col64[mutex_prof_num_uint64_t_counters];
 		emitter_col_t col32[mutex_prof_num_uint32_t_counters];
-		uint64_t uptime;
+		uint64_t      uptime;
 
 		emitter_row_init(&row);
 		mutex_stats_init_cols(&row, "", &name, col64, col32);
@@ -1770,7 +1966,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
 			mutex_stats_read_global(stats_mutexes_mib, 2,
 			    global_mutex_names[i], &name, col64, col32, uptime);
-			emitter_json_object_kv_begin(emitter, global_mutex_names[i]);
+			emitter_json_object_kv_begin(
+			    emitter, global_mutex_names[i]);
 			mutex_stats_emit(emitter, &row, col64, col32);
 			emitter_json_object_end(emitter);
 		}
@@ -1789,24 +1986,24 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		size_t mib[3];
 		size_t miblen = sizeof(mib) / sizeof(size_t);
 		size_t sz;
-		VARIABLE_ARRAY(bool, initialized, narenas);
-		bool destroyed_initialized;
-		unsigned i, j, ninitialized;
+		VARIABLE_ARRAY_UNSAFE(bool, initialized, narenas);
+		bool     destroyed_initialized;
+		unsigned i, ninitialized;
 
 		xmallctlnametomib("arena.0.initialized", mib, &miblen);
 		for (i = ninitialized = 0; i < narenas; i++) {
 			mib[1] = i;
 			sz = sizeof(bool);
-			xmallctlbymib(mib, miblen, &initialized[i], &sz,
-			    NULL, 0);
+			xmallctlbymib(
+			    mib, miblen, &initialized[i], &sz, NULL, 0);
 			if (initialized[i]) {
 				ninitialized++;
 			}
 		}
 		mib[1] = MALLCTL_ARENAS_DESTROYED;
 		sz = sizeof(bool);
-		xmallctlbymib(mib, miblen, &destroyed_initialized, &sz,
-		    NULL, 0);
+		xmallctlbymib(
+		    mib, miblen, &destroyed_initialized, &sz, NULL, 0);
 
 		/* Merged stats. */
 		if (merged && (ninitialized > 1 || !unmerged)) {
@@ -1821,23 +2018,24 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		/* Destroyed stats. */
 		if (destroyed_initialized && destroyed) {
 			/* Print destroyed arena stats. */
-			emitter_table_printf(emitter,
-			    "Destroyed arenas stats:\n");
+			emitter_table_printf(
+			    emitter, "Destroyed arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "destroyed");
 			stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
 			    bins, large, mutex, extents, hpa);
-			emitter_json_object_end(emitter); /* Close "destroyed". */
+			emitter_json_object_end(
+			    emitter); /* Close "destroyed". */
 		}
 
 		/* Unmerged stats. */
 		if (unmerged) {
-			for (i = j = 0; i < narenas; i++) {
+			for (i = 0; i < narenas; i++) {
 				if (initialized[i]) {
 					char arena_ind_str[20];
 					malloc_snprintf(arena_ind_str,
 					    sizeof(arena_ind_str), "%u", i);
-					emitter_json_object_kv_begin(emitter,
-					    arena_ind_str);
+					emitter_json_object_kv_begin(
+					    emitter, arena_ind_str);
 					emitter_table_printf(emitter,
 					    "arenas[%s]:\n", arena_ind_str);
 					stats_arena_print(emitter, i, bins,
@@ -1853,9 +2051,9 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 void
 stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
-	int err;
+	int      err;
 	uint64_t epoch;
-	size_t u64sz;
+	size_t   u64sz;
 #define OPTION(o, v, d, s) bool v = d;
 	STATS_PRINT_OPTIONS
 #undef OPTION
@@ -1869,15 +2067,17 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	 * */
 	epoch = 1;
 	u64sz = sizeof(uint64_t);
-	err = je_mallctl("epoch", (void *)&epoch, &u64sz, (void *)&epoch,
-	    sizeof(uint64_t));
+	err = je_mallctl(
+	    "epoch", (void *)&epoch, &u64sz, (void *)&epoch, sizeof(uint64_t));
 	if (err != 0) {
 		if (err == EAGAIN) {
-			malloc_write("<jemalloc>: Memory allocation failure in "
+			malloc_write(
+			    "<jemalloc>: Memory allocation failure in "
 			    "mallctl(\"epoch\", ...)\n");
 			return;
 		}
-		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+		malloc_write(
+		    "<jemalloc>: Failure in mallctl(\"epoch\", "
 		    "...)\n");
 		abort();
 	}
@@ -1885,7 +2085,10 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	if (opts != NULL) {
 		for (unsigned i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
-#define OPTION(o, v, d, s) case o: v = s; break;
+#define OPTION(o, v, d, s)                                                     \
+	case o:                                                                \
+		v = s;                                                         \
+		break;
 				STATS_PRINT_OPTIONS
 #undef OPTION
 			default:;
@@ -1895,8 +2098,8 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 
 	emitter_t emitter;
 	emitter_init(&emitter,
-	    json ? emitter_output_json_compact : emitter_output_table,
-	    write_cb, cbopaque);
+	    json ? emitter_output_json_compact : emitter_output_table, write_cb,
+	    cbopaque);
 	emitter_begin(&emitter);
 	emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
 	emitter_json_object_kv_begin(&emitter, "jemalloc");
@@ -1905,8 +2108,8 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 		stats_general_print(&emitter);
 	}
 	if (config_stats) {
-		stats_print_helper(&emitter, merged, destroyed, unmerged,
-		    bins, large, mutex, extents, hpa);
+		stats_print_helper(&emitter, merged, destroyed, unmerged, bins,
+		    large, mutex, extents, hpa);
 	}
 
 	emitter_json_object_end(&emitter); /* Closes the "jemalloc" dict. */
@@ -1925,14 +2128,31 @@ stats_interval_postponed_event_wait(tsd_t *tsd) {
 }
 
 void
-stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed) {
+stats_interval_event_handler(tsd_t *tsd) {
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_sample_event = tsd_stats_interval_last_event_get(tsd);
+	tsd_stats_interval_last_event_set(tsd, last_event);
+	uint64_t elapsed = last_event - last_sample_event;
+
 	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
-	if (counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated,
-	    elapsed)) {
+	if (counter_accum(
+	        tsd_tsdn(tsd), &stats_interval_accumulated, elapsed)) {
 		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
 	}
 }
 
+static te_enabled_t
+stats_interval_enabled(void) {
+	return opt_stats_interval >= 0 ? te_enabled_yes : te_enabled_no;
+}
+
+te_base_cb_t stats_interval_te_handler = {
+    .enabled = &stats_interval_enabled,
+    .new_event_wait = &stats_interval_new_event_wait,
+    .postponed_event_wait = &stats_interval_postponed_event_wait,
+    .event_handler = &stats_interval_event_handler,
+};
+
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
@@ -1940,12 +2160,12 @@ stats_boot(void) {
 		assert(opt_stats_interval == -1);
 		stats_interval = 0;
 		stats_interval_accum_batch = 0;
-	} else{
+	} else {
 		/* See comments in stats.h */
-		stats_interval = (opt_stats_interval > 0) ?
-		    opt_stats_interval : 1;
-		uint64_t batch = stats_interval >>
-		    STATS_INTERVAL_ACCUM_LG_BATCH_SIZE;
+		stats_interval = (opt_stats_interval > 0) ? opt_stats_interval
+		                                          : 1;
+		uint64_t batch = stats_interval
+		    >> STATS_INTERVAL_ACCUM_LG_BATCH_SIZE;
 		if (batch > STATS_INTERVAL_ACCUM_BATCH_MAX) {
 			batch = STATS_INTERVAL_ACCUM_BATCH_MAX;
 		} else if (batch == 0) {
diff --git a/src/sz.c b/src/sz.c
index d3115dda..da92f2b4 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -3,12 +3,12 @@
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALIGNED(CACHELINE)
-size_t sz_pind2sz_tab[SC_NPSIZES+1];
+size_t sz_pind2sz_tab[SC_NPSIZES + 1];
 size_t sz_large_pad;
 
 size_t
 sz_psz_quantize_floor(size_t size) {
-	size_t ret;
+	size_t   ret;
 	pszind_t pind;
 
 	assert(size > 0);
@@ -47,8 +47,8 @@ sz_psz_quantize_ceil(size_t size) {
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
-		    sz_large_pad;
+		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1))
+		    + sz_large_pad;
 	}
 	return ret;
 }
@@ -65,7 +65,7 @@ sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
 		}
 	}
 	for (int i = pind; i <= (int)SC_NPSIZES; i++) {
-		sz_pind2sz_tab[pind] = sc_data->large_maxclass + PAGE;
+		sz_pind2sz_tab[i] = sc_data->large_maxclass + PAGE;
 	}
 }
 
@@ -95,12 +95,13 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 	for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max;
 	    sc_ind++) {
 		const sc_t *sc = &sc_data->sc[sc_ind];
-		size_t sz = (ZU(1) << sc->lg_base)
+		size_t      sz = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
 		size_t max_ind = ((sz + (ZU(1) << SC_LG_TINY_MIN) - 1)
-				   >> SC_LG_TINY_MIN);
+		    >> SC_LG_TINY_MIN);
 		for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) {
-			sz_size2index_tab[dst_ind] = sc_ind;
+			assert(sc_ind < 1 << (sizeof(uint8_t) * 8));
+			sz_size2index_tab[dst_ind] = (uint8_t)sc_ind;
 		}
 	}
 }
diff --git a/src/tcache.c b/src/tcache.c
index fa16732e..fe210d27 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/san.h"
@@ -12,7 +13,7 @@
 
 bool opt_tcache = true;
 
-/* tcache_maxclass is set to 32KB by default.  */
+/* global_do_not_change_tcache_maxclass is set to 32KB by default. */
 size_t opt_tcache_max = ((size_t)1) << 15;
 
 /* Reasonable defaults for min and max values. */
@@ -28,7 +29,7 @@ unsigned opt_tcache_nslots_large = 20;
  * This is bounded by some other constraints as well, like the fact that it
  * must be even, must be less than opt_tcache_nslots_small_max, etc..
  */
-ssize_t	opt_lg_tcache_nslots_mul = 1;
+ssize_t opt_lg_tcache_nslots_mul = 1;
 
 /*
  * Number of allocation bytes between tcache incremental GCs.  Again, this
@@ -57,27 +58,41 @@ size_t opt_tcache_gc_delay_bytes = 0;
 unsigned opt_lg_tcache_flush_small_div = 1;
 unsigned opt_lg_tcache_flush_large_div = 1;
 
-cache_bin_info_t	*tcache_bin_info;
+/*
+ * Number of cache bins enabled, including both large and small.  This value
+ * is only used to initialize tcache_nbins in the per-thread tcache.
+ * Directly modifying it will not affect threads already launched.
+ */
+unsigned global_do_not_change_tcache_nbins;
+/*
+ * Max size class to be cached (can be small or large). This value is only used
+ * to initialize tcache_max in the per-thread tcache.   Directly modifying it
+ * will not affect threads already launched.
+ */
+size_t global_do_not_change_tcache_maxclass;
 
-/* Total stack size required (per tcache).  Include the padding above. */
-static size_t tcache_bin_alloc_size;
-static size_t tcache_bin_alloc_alignment;
+/*
+ * Default bin info for each bin.  Will be initialized in malloc_conf_init
+ * and tcache_boot and should not be modified after that.
+ */
+static cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
+/*
+ * Marks whether a bin's info is set already.  This is used in
+ * tcache_bin_info_compute to avoid overwriting ncached_max specified by
+ * malloc_conf.  It should be set only when parsing malloc_conf.
+ */
+static bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
 
-/* Number of cache bins enabled, including both large and small. */
-unsigned		nhbins;
-/* Max size class to be cached (can be small or large). */
-size_t			tcache_maxclass;
-
-tcaches_t		*tcaches;
+tcaches_t *tcaches;
 
 /* Index of first element within tcaches that has never been used. */
-static unsigned		tcaches_past;
+static unsigned tcaches_past;
 
 /* Head of singly linked list tracking available tcaches elements. */
-static tcaches_t	*tcaches_avail;
+static tcaches_t *tcaches_avail;
 
 /* Protects tcaches{,_past,_avail}. */
-static malloc_mutex_t	tcaches_mtx;
+static malloc_mutex_t tcaches_mtx;
 
 /******************************************************************************/
 
@@ -96,14 +111,83 @@ tcache_gc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
-uint64_t
-tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
-	return opt_tcache_gc_incr_bytes;
+static inline void
+tcache_bin_fill_ctl_init(tcache_slow_t *tcache_slow, szind_t szind) {
+	assert(szind < SC_NBINS);
+	cache_bin_fill_ctl_t *ctl =
+	    &tcache_slow->bin_fill_ctl_do_not_access_directly[szind];
+	ctl->base = 1;
+	ctl->offset = 0;
 }
 
-uint64_t
-tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
+static inline cache_bin_fill_ctl_t *
+tcache_bin_fill_ctl_get(tcache_slow_t *tcache_slow, szind_t szind) {
+	assert(szind < SC_NBINS);
+	cache_bin_fill_ctl_t *ctl =
+	    &tcache_slow->bin_fill_ctl_do_not_access_directly[szind];
+	assert(ctl->base > ctl->offset);
+	return ctl;
+}
+
+/*
+ * The number of items to be filled at a time for a given small bin is
+ * calculated by (ncached_max >> lg_fill_div).
+ * The actual ctl struct consists of two fields, i.e. base and offset,
+ * and the difference between the two(base - offset) is the final lg_fill_div.
+ * The base is adjusted during GC based on the traffic within a period of time,
+ * while the offset is updated in real time to handle the immediate traffic.
+ */
+static inline uint8_t
+tcache_nfill_small_lg_div_get(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	return (ctl->base - (opt_experimental_tcache_gc ? ctl->offset : 0));
+}
+
+/*
+ * When we want to fill more items to respond to burst load,
+ * offset is increased so that (base - offset) is decreased,
+ * which in return increases the number of items to be filled.
+ */
+static inline void
+tcache_nfill_small_burst_prepare(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	if (ctl->offset + 1 < ctl->base) {
+		ctl->offset++;
+	}
+}
+
+static inline void
+tcache_nfill_small_burst_reset(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	ctl->offset = 0;
+}
+
+/*
+ * limit == 0: indicating that the fill count should be increased,
+ * i.e. lg_div(base) should be decreased.
+ *
+ * limit != 0: limit is set to ncached_max, indicating that the fill
+ * count should be decreased, i.e. lg_div(base) should be increased.
+ */
+static inline void
+tcache_nfill_small_gc_update(
+    tcache_slow_t *tcache_slow, szind_t szind, cache_bin_sz_t limit) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	if (!limit && ctl->base > 1) {
+		/*
+		 * Increase fill count by 2X for small bins.  Make sure
+		 * lg_fill_div stays greater than 1.
+		 */
+		ctl->base--;
+	} else if (limit && (limit >> ctl->base) > 1) {
+		/*
+		 * Reduce fill count by 2X.  Limit lg_fill_div such that
+		 * the fill count is always at least 1.
+		 */
+		ctl->base++;
+	}
+	/* Reset the offset for the next GC period. */
+	ctl->offset = 0;
 }
 
 static uint8_t
@@ -119,420 +203,467 @@ tcache_gc_item_delay_compute(szind_t szind) {
 	return (uint8_t)item_delay;
 }
 
-static void
-tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    szind_t szind) {
-	/* Aim to flush 3/4 of items below low-water. */
+static inline void *
+tcache_gc_small_heuristic_addr_get(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, szind_t szind) {
+	assert(szind < SC_NBINS);
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	bin_t  *bin = bin_choose(tsdn, tcache_slow->arena, szind, NULL);
+	assert(bin != NULL);
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	edata_t *slab = (bin->slabcur == NULL)
+	    ? edata_heap_first(&bin->slabs_nonfull)
+	    : bin->slabcur;
+	assert(slab != NULL || edata_heap_empty(&bin->slabs_nonfull));
+	void *ret = (slab != NULL) ? edata_addr_get(slab) : NULL;
+	assert(ret != NULL || slab == NULL);
+	malloc_mutex_unlock(tsdn, &bin->lock);
+
+	return ret;
+}
+
+static inline bool
+tcache_gc_is_addr_remote(void *addr, uintptr_t min, uintptr_t max) {
+	assert(addr != NULL);
+	return ((uintptr_t)addr < min || (uintptr_t)addr >= max);
+}
+
+static inline cache_bin_sz_t
+tcache_gc_small_nremote_get(cache_bin_t *cache_bin, void *addr,
+    uintptr_t *addr_min, uintptr_t *addr_max, szind_t szind, size_t nflush) {
+	assert(addr != NULL && addr_min != NULL && addr_max != NULL);
+	/* The slab address range that the provided addr belongs to. */
+	uintptr_t slab_min = (uintptr_t)addr;
+	uintptr_t slab_max = slab_min + bin_infos[szind].slab_size;
+	/*
+	 * When growing retained virtual memory, it's increased exponentially,
+	 * starting from 2M, so that the total number of disjoint virtual
+	 * memory ranges retained by each shard is limited.
+	 */
+	uintptr_t neighbor_min = ((uintptr_t)addr > TCACHE_GC_NEIGHBOR_LIMIT)
+	    ? ((uintptr_t)addr - TCACHE_GC_NEIGHBOR_LIMIT)
+	    : 0;
+	uintptr_t neighbor_max = ((uintptr_t)addr
+	                             < (UINTPTR_MAX - TCACHE_GC_NEIGHBOR_LIMIT))
+	    ? ((uintptr_t)addr + TCACHE_GC_NEIGHBOR_LIMIT)
+	    : UINTPTR_MAX;
+
+	/* Scan the entire bin to count the number of remote pointers. */
+	void         **head = cache_bin->stack_head;
+	cache_bin_sz_t n_remote_slab = 0, n_remote_neighbor = 0;
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	for (void **cur = head; cur < head + ncached; cur++) {
+		n_remote_slab += (cache_bin_sz_t)tcache_gc_is_addr_remote(
+		    *cur, slab_min, slab_max);
+		n_remote_neighbor += (cache_bin_sz_t)tcache_gc_is_addr_remote(
+		    *cur, neighbor_min, neighbor_max);
+	}
+	/*
+	 * Note: since slab size is dynamic and can be larger than 2M, i.e.
+	 * TCACHE_GC_NEIGHBOR_LIMIT, there is no guarantee as to which of
+	 * n_remote_slab and n_remote_neighbor is greater.
+	 */
+	assert(n_remote_slab <= ncached && n_remote_neighbor <= ncached);
+	/*
+	 * We first consider keeping ptrs from the neighboring addr range,
+	 * since in most cases the range is greater than the slab range.
+	 * So if the number of non-neighbor ptrs is more than the intended
+	 * flush amount, we use it as the anchor for flushing.
+	 */
+	if (n_remote_neighbor >= nflush) {
+		*addr_min = neighbor_min;
+		*addr_max = neighbor_max;
+		return n_remote_neighbor;
+	}
+	/*
+	 * We then consider only keeping ptrs from the local slab, and in most
+	 * cases this is stricter, assuming that slab < 2M is the common case.
+	 */
+	*addr_min = slab_min;
+	*addr_max = slab_max;
+	return n_remote_slab;
+}
+
+/* Shuffle the ptrs in the bin to put the remote pointers at the bottom. */
+static inline void
+tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
+    uintptr_t addr_min, uintptr_t addr_max) {
+	void         **swap = NULL;
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	cache_bin_sz_t ntop = ncached - nremote, cnt = 0;
+	assert(ntop > 0 && ntop < ncached);
+	/*
+	 * Scan the [head, head + ntop) part of the cache bin, during which
+	 * bubbling the non-remote ptrs to the top of the bin.
+	 * After this, the [head, head + cnt) part of the bin contains only
+	 * non-remote ptrs, and they're in the same relative order as before.
+	 * While the [head + cnt, head + ntop) part contains only remote ptrs.
+	 */
+	void **head = cache_bin->stack_head;
+	for (void **cur = head; cur < head + ntop; cur++) {
+		if (!tcache_gc_is_addr_remote(*cur, addr_min, addr_max)) {
+			/* Tracks the number of non-remote ptrs seen so far. */
+			cnt++;
+			/*
+			 * There is remote ptr before the current non-remote ptr,
+			 * swap the current non-remote ptr with the remote ptr,
+			 * and increment the swap pointer so that it's still
+			 * pointing to the top remote ptr in the bin.
+			 */
+			if (swap != NULL) {
+				assert(swap < cur);
+				assert(tcache_gc_is_addr_remote(
+				    *swap, addr_min, addr_max));
+				void *tmp = *cur;
+				*cur = *swap;
+				*swap = tmp;
+				swap++;
+				assert(swap <= cur);
+				assert(tcache_gc_is_addr_remote(
+				    *swap, addr_min, addr_max));
+			}
+			continue;
+		} else if (swap == NULL) {
+			/* Swap always points to the top remote ptr in the bin. */
+			swap = cur;
+		}
+	}
+	/*
+	 * Scan the [head + ntop, head + ncached) part of the cache bin,
+	 * after which it should only contain remote ptrs.
+	 */
+	for (void **cur = head + ntop; cur < head + ncached; cur++) {
+		/* Early break if all non-remote ptrs have been moved. */
+		if (cnt == ntop) {
+			break;
+		}
+		if (!tcache_gc_is_addr_remote(*cur, addr_min, addr_max)) {
+			assert(tcache_gc_is_addr_remote(
+			    *(head + cnt), addr_min, addr_max));
+			void *tmp = *cur;
+			*cur = *(head + cnt);
+			*(head + cnt) = tmp;
+			cnt++;
+		}
+	}
+	assert(cnt == ntop);
+	/* Sanity check to make sure the shuffle is done correctly. */
+	for (void **cur = head; cur < head + ncached; cur++) {
+		assert(*cur != NULL);
+		assert(
+		    ((cur < head + ntop)
+		        && !tcache_gc_is_addr_remote(*cur, addr_min, addr_max))
+		    || ((cur >= head + ntop)
+		        && tcache_gc_is_addr_remote(*cur, addr_min, addr_max)));
+	}
+}
+
+static bool
+tcache_gc_small(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
+	/*
+	 * Aim to flush 3/4 of items below low-water, with remote pointers being
+	 * prioritized for flushing.
+	 */
 	assert(szind < SC_NBINS);
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[szind]);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
+	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
+	if (low_water > 0) {
+		/*
+		 * There is unused items within the GC period => reduce fill count.
+		 * limit field != 0 is borrowed to indicate that the fill count
+		 * should be reduced.
+		 */
+		tcache_nfill_small_gc_update(tcache_slow, szind,
+		    /* limit */ cache_bin_ncached_max_get(cache_bin));
+	} else if (tcache_slow->bin_refilled[szind]) {
+		/*
+		 * There has been refills within the GC period => increase fill count.
+		 * limit field set to 0 is borrowed to indicate that the fill count
+		 * should be increased.
+		 */
+		tcache_nfill_small_gc_update(tcache_slow, szind, /* limit */ 0);
+		tcache_slow->bin_refilled[szind] = false;
+	}
 	assert(!tcache_slow->bin_refilled[szind]);
 
-	size_t nflush = low_water - (low_water >> 2);
-	if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
-		/* Workaround for a conversion warning. */
-		uint8_t nflush_uint8 = (uint8_t)nflush;
-		assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
-		    sizeof(nflush_uint8));
-		tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
-		return;
-	} else {
-		tcache_slow->bin_flush_delay_items[szind]
-		    = tcache_gc_item_delay_compute(szind);
+	cache_bin_sz_t nflush = low_water - (low_water >> 2);
+	/*
+	 * When the new tcache gc is not enabled, keep the flush delay logic,
+	 * and directly flush the bottom nflush items if needed.
+	 */
+	if (!opt_experimental_tcache_gc) {
+		if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
+			/* Workaround for a conversion warning. */
+			uint8_t nflush_uint8 = (uint8_t)nflush;
+			assert(sizeof(tcache_slow->bin_flush_delay_items[0])
+			    == sizeof(nflush_uint8));
+			tcache_slow->bin_flush_delay_items[szind] -=
+			    nflush_uint8;
+			return false;
+		}
+
+		tcache_slow->bin_flush_delay_items[szind] =
+		    tcache_gc_item_delay_compute(szind);
+		goto label_flush;
 	}
 
-	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
-	    (unsigned)(ncached - nflush));
+	/* Directly goto the flush path when the entire bin needs to be flushed. */
+	if (nflush == ncached) {
+		goto label_flush;
+	}
+
+	/* Query arena binshard to get heuristic locality info. */
+	void *addr = tcache_gc_small_heuristic_addr_get(
+	    tsd, tcache_slow, szind);
+	if (addr == NULL) {
+		goto label_flush;
+	}
 
 	/*
-	 * Reduce fill count by 2X.  Limit lg_fill_div such that
-	 * the fill count is always at least 1.
+	 * Use the queried addr above to get the number of remote ptrs in the
+	 * bin, and the min/max of the local addr range.
 	 */
-	if ((cache_bin_info_ncached_max(&tcache_bin_info[szind])
-	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
-		tcache_slow->lg_fill_div[szind]++;
+	uintptr_t      addr_min, addr_max;
+	cache_bin_sz_t nremote = tcache_gc_small_nremote_get(
+	    cache_bin, addr, &addr_min, &addr_max, szind, nflush);
+
+	/*
+	 * Update the nflush to the larger value between the intended flush count
+	 * and the number of remote ptrs.
+	 */
+	if (nremote > nflush) {
+		nflush = nremote;
 	}
+	/*
+	 * When entering the locality check, nflush should be less than ncached,
+	 * otherwise the entire bin should be flushed regardless. The only case
+	 * when nflush gets updated to ncached after locality check is, when all
+	 * the items in the bin are remote, in which case the entire bin should
+	 * also be flushed.
+	 */
+	assert(nflush < ncached || nremote == ncached);
+	if (nremote == 0 || nremote == ncached) {
+		goto label_flush;
+	}
+
+	/*
+	 * Move the remote points to the bottom of the bin for flushing.
+	 * As long as moved to the bottom, the order of these nremote ptrs
+	 * does not matter, since they are going to be flushed anyway.
+	 * The rest of the ptrs are moved to the top of the bin, and their
+	 * relative order is maintained.
+	 */
+	tcache_gc_small_bin_shuffle(cache_bin, nremote, addr_min, addr_max);
+
+label_flush:
+	if (nflush == 0) {
+		assert(low_water == 0);
+		return false;
+	}
+	assert(nflush <= ncached);
+	tcache_bin_flush_small(
+	    tsd, tcache, cache_bin, szind, (unsigned)(ncached - nflush));
+	return true;
 }
 
-static void
-tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    szind_t szind) {
-	/* Like the small GC; flush 3/4 of untouched items. */
+static bool
+tcache_gc_large(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
+	/*
+	 * Like the small GC, flush 3/4 of untouched items. However, simply flush
+	 * the bottom nflush items, without any locality check.
+	 */
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[szind]);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
-	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
-	    (unsigned)(ncached - low_water + (low_water >> 2)));
+	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
+	if (low_water == 0) {
+		return false;
+	}
+	unsigned nrem = (unsigned)(cache_bin_ncached_get_local(cache_bin)
+	    - low_water + (low_water >> 2));
+	tcache_bin_flush_large(tsd, tcache, cache_bin, szind, nrem);
+	return true;
+}
+
+/* Try to gc one bin by szind, return true if there is item flushed. */
+static bool
+tcache_try_gc_bin(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
+	assert(tcache != NULL);
+	cache_bin_t *cache_bin = &tcache->bins[szind];
+	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
+		return false;
+	}
+
+	bool is_small = (szind < SC_NBINS);
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
+	bool ret = is_small ? tcache_gc_small(tsd, tcache_slow, tcache, szind)
+	                    : tcache_gc_large(tsd, tcache_slow, tcache, szind);
+	cache_bin_low_water_set(cache_bin);
+	return ret;
 }
 
 static void
-tcache_event(tsd_t *tsd) {
+tcache_gc_event(tsd_t *tsd) {
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache == NULL) {
 		return;
 	}
 
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-	szind_t szind = tcache_slow->next_gc_bin;
-	bool is_small = (szind < SC_NBINS);
-	cache_bin_t *cache_bin = &tcache->bins[szind];
+	assert(tcache_slow != NULL);
 
-	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
-
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
-	if (low_water > 0) {
-		if (is_small) {
-			tcache_gc_small(tsd, tcache_slow, tcache, szind);
-		} else {
-			tcache_gc_large(tsd, tcache_slow, tcache, szind);
+	/* When the new tcache gc is not enabled, GC one bin at a time. */
+	if (!opt_experimental_tcache_gc) {
+		szind_t szind = tcache_slow->next_gc_bin;
+		tcache_try_gc_bin(tsd, tcache_slow, tcache, szind);
+		tcache_slow->next_gc_bin++;
+		if (tcache_slow->next_gc_bin == tcache_nbins_get(tcache_slow)) {
+			tcache_slow->next_gc_bin = 0;
 		}
-	} else if (is_small && tcache_slow->bin_refilled[szind]) {
-		assert(low_water == 0);
-		/*
-		 * Increase fill count by 2X for small bins.  Make sure
-		 * lg_fill_div stays greater than 0.
-		 */
-		if (tcache_slow->lg_fill_div[szind] > 1) {
-			tcache_slow->lg_fill_div[szind]--;
+		return;
+	}
+
+	nstime_t now;
+	nstime_copy(&now, &tcache_slow->last_gc_time);
+	nstime_update(&now);
+	assert(nstime_compare(&now, &tcache_slow->last_gc_time) >= 0);
+
+	if (nstime_ns(&now) - nstime_ns(&tcache_slow->last_gc_time)
+	    < TCACHE_GC_INTERVAL_NS) {
+		// time interval is too short, skip this event.
+		return;
+	}
+	/* Update last_gc_time to now. */
+	nstime_copy(&tcache_slow->last_gc_time, &now);
+
+	unsigned gc_small_nbins = 0, gc_large_nbins = 0;
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
+	unsigned small_nbins = tcache_nbins > SC_NBINS ? SC_NBINS
+	                                               : tcache_nbins;
+	szind_t  szind_small = tcache_slow->next_gc_bin_small;
+	szind_t  szind_large = tcache_slow->next_gc_bin_large;
+
+	/* Flush at most TCACHE_GC_SMALL_NBINS_MAX small bins at a time. */
+	for (unsigned i = 0;
+	     i < small_nbins && gc_small_nbins < TCACHE_GC_SMALL_NBINS_MAX;
+	     i++) {
+		assert(szind_small < SC_NBINS);
+		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_small)) {
+			gc_small_nbins++;
+		}
+		if (++szind_small == small_nbins) {
+			szind_small = 0;
 		}
-		tcache_slow->bin_refilled[szind] = false;
 	}
-	cache_bin_low_water_set(cache_bin);
+	tcache_slow->next_gc_bin_small = szind_small;
 
-	tcache_slow->next_gc_bin++;
-	if (tcache_slow->next_gc_bin == nhbins) {
-		tcache_slow->next_gc_bin = 0;
+	if (tcache_nbins <= SC_NBINS) {
+		return;
 	}
-}
 
-void
-tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	assert(elapsed == TE_INVALID_ELAPSED);
-	tcache_event(tsd);
-}
-
-void
-tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	assert(elapsed == TE_INVALID_ELAPSED);
-	tcache_event(tsd);
+	/* Flush at most TCACHE_GC_LARGE_NBINS_MAX large bins at a time. */
+	for (unsigned i = SC_NBINS;
+	     i < tcache_nbins && gc_large_nbins < TCACHE_GC_LARGE_NBINS_MAX;
+	     i++) {
+		assert(szind_large >= SC_NBINS && szind_large < tcache_nbins);
+		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_large)) {
+			gc_large_nbins++;
+		}
+		if (++szind_large == tcache_nbins) {
+			szind_large = SC_NBINS;
+		}
+	}
+	tcache_slow->next_gc_bin_large = szind_large;
 }
 
 void *
-tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
-    tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
-    bool *tcache_success) {
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, bool *tcache_success) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-	void *ret;
+	void          *ret;
 
 	assert(tcache_slow->arena != NULL);
-	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
-	    >> tcache_slow->lg_fill_div[binind];
-	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
-	    &tcache_bin_info[binind], binind, nfill);
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
+	assert(cache_bin_ncached_get_local(cache_bin) == 0);
+	cache_bin_sz_t nfill = cache_bin_ncached_max_get(cache_bin)
+	    >> tcache_nfill_small_lg_div_get(tcache_slow, binind);
+	if (nfill == 0) {
+		nfill = 1;
+	}
+	cache_bin_sz_t nfill_min = opt_experimental_tcache_gc
+	    ? ((nfill >> 1) + 1)
+	    : nfill;
+	cache_bin_sz_t nfill_max = nfill;
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max);
+	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max);
+
+	cache_bin_sz_t filled = arena_ptr_array_fill_small(tsdn, arena, binind,
+	    &ptrs, /* nfill_min */ nfill_min, /* nfill_max */ nfill_max,
+	    cache_bin->tstats);
+	cache_bin_finish_fill(cache_bin, &ptrs, filled);
+	assert(filled >= nfill_min && filled <= nfill_max);
+	assert(cache_bin_ncached_get_local(cache_bin) == filled);
+
 	tcache_slow->bin_refilled[binind] = true;
+	tcache_nfill_small_burst_prepare(tcache_slow, binind);
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 
 	return ret;
 }
 
-static const void *
-tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
-	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
-	return arr->ptr[ind];
-}
-
-static void
-tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
-    emap_full_alloc_ctx_t *alloc_ctx) {
-	size_t *szind_sum = (size_t *)szind_sum_ctx;
-	*szind_sum -= alloc_ctx->szind;
-	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
-}
-
-JEMALLOC_NOINLINE static void
-tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
-    size_t nptrs, emap_batch_lookup_result_t *edatas) {
-	bool found_mismatch = false;
-	for (size_t i = 0; i < nptrs; i++) {
-		szind_t true_szind = edata_szind_get(edatas[i].edata);
-		if (true_szind != szind) {
-			found_mismatch = true;
-			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ false,
-			    /* ptr */ tcache_bin_flush_ptr_getter(arr, i),
-			    /* true_size */ sz_index2size(true_szind),
-			    /* input_size */ sz_index2size(szind));
-		}
-	}
-	assert(found_mismatch);
-}
-
-static void
-tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
-    szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
-
-	/*
-	 * This gets compiled away when config_opt_safety_checks is false.
-	 * Checks for sized deallocation bugs, failing early rather than
-	 * corrupting metadata.
-	 */
-	size_t szind_sum = binind * nflush;
-	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
-	    &tcache_bin_flush_ptr_getter, (void *)arr,
-	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
-	    edatas);
-	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
-		tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
-    unsigned cur_binshard, bool small) {
-	if (small) {
-		return edata_arena_ind_get(edata) == cur_arena_ind
-		    && edata_binshard_get(edata) == cur_binshard;
-	} else {
-		return edata_arena_ind_get(edata) == cur_arena_ind;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-	/*
-	 * A couple lookup calls take tsdn; declare it once for convenience
-	 * instead of calling tsd_tsdn(tsd) all the time.
-	 */
-	tsdn_t *tsdn = tsd_tsdn(tsd);
-
-	if (small) {
-		assert(binind < SC_NBINS);
-	} else {
-		assert(binind < nhbins);
-	}
-	arena_t *tcache_arena = tcache_slow->arena;
-	assert(tcache_arena != NULL);
-
-	/*
-	 * Variable length array must have > 0 length; the last element is never
-	 * touched (it's just included to satisfy the no-zero-length rule).
-	 */
-	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
-	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
-
-	/*
-	 * The slabs where we freed the last remaining object in the slab (and
-	 * so need to free the slab itself).
-	 * Used only if small == true.
-	 */
-	unsigned dalloc_count = 0;
-	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
-
-	/*
-	 * We're about to grab a bunch of locks.  If one of them happens to be
-	 * the one guarding the arena-level stats counters we flush our
-	 * thread-local ones to, we do so under one critical section.
-	 */
-	bool merged_stats = false;
-	while (nflush > 0) {
-		/* Lock the arena, or bin, associated with the first object. */
-		edata_t *edata = item_edata[0].edata;
-		unsigned cur_arena_ind = edata_arena_ind_get(edata);
-		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
-
-		/*
-		 * These assignments are always overwritten when small is true,
-		 * and their values are always ignored when small is false, but
-		 * to avoid the technical UB when we pass them as parameters, we
-		 * need to intialize them.
-		 */
-		unsigned cur_binshard = 0;
-		bin_t *cur_bin = NULL;
-		if (small) {
-			cur_binshard = edata_binshard_get(edata);
-			cur_bin = arena_get_bin(cur_arena, binind,
-			    cur_binshard);
-			assert(cur_binshard < bin_infos[binind].n_shards);
-			/*
-			 * If you're looking at profiles, you might think this
-			 * is a good place to prefetch the bin stats, which are
-			 * often a cache miss.  This turns out not to be
-			 * helpful on the workloads we've looked at, with moving
-			 * the bin stats next to the lock seeming to do better.
-			 */
-		}
-
-		if (small) {
-			malloc_mutex_lock(tsdn, &cur_bin->lock);
-		}
-		if (!small && !arena_is_auto(cur_arena)) {
-			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
-		}
-
-		/*
-		 * If we acquired the right lock and have some stats to flush,
-		 * flush them.
-		 */
-		if (config_stats && tcache_arena == cur_arena
-		    && !merged_stats) {
-			merged_stats = true;
-			if (small) {
-				cur_bin->stats.nflushes++;
-				cur_bin->stats.nrequests +=
-				    cache_bin->tstats.nrequests;
-				cache_bin->tstats.nrequests = 0;
-			} else {
-				arena_stats_large_flush_nrequests_add(tsdn,
-				    &tcache_arena->stats, binind,
-				    cache_bin->tstats.nrequests);
-				cache_bin->tstats.nrequests = 0;
-			}
-		}
-
-		/*
-		 * Large allocations need special prep done.  Afterwards, we can
-		 * drop the large lock.
-		 */
-		if (!small) {
-			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = ptrs->ptr[i];
-				edata = item_edata[i].edata;
-				assert(ptr != NULL && edata != NULL);
-
-				if (tcache_bin_flush_match(edata, cur_arena_ind,
-				    cur_binshard, small)) {
-					large_dalloc_prep_locked(tsdn,
-					    edata);
-				}
-			}
-		}
-		if (!small && !arena_is_auto(cur_arena)) {
-			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
-		}
-
-		/* Deallocate whatever we can. */
-		unsigned ndeferred = 0;
-		/* Init only to avoid used-uninitialized warning. */
-		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		if (small) {
-			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		}
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = ptrs->ptr[i];
-			edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-			if (!tcache_bin_flush_match(edata, cur_arena_ind,
-			    cur_binshard, small)) {
-				/*
-				 * The object was allocated either via a
-				 * different arena, or a different bin in this
-				 * arena.  Either way, stash the object so that
-				 * it can be handled in a future pass.
-				 */
-				ptrs->ptr[ndeferred] = ptr;
-				item_edata[ndeferred].edata = edata;
-				ndeferred++;
-				continue;
-			}
-			if (small) {
-				if (arena_dalloc_bin_locked_step(tsdn,
-				    cur_arena, cur_bin, &dalloc_bin_info,
-				    binind, edata, ptr)) {
-					dalloc_slabs[dalloc_count] = edata;
-					dalloc_count++;
-				}
-			} else {
-				if (large_dalloc_safety_checks(edata, ptr,
-				    binind)) {
-					/* See the comment in isfree. */
-					continue;
-				}
-				large_dalloc_finish(tsdn, edata);
-			}
-		}
-
-		if (small) {
-			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
-			    &dalloc_bin_info);
-			malloc_mutex_unlock(tsdn, &cur_bin->lock);
-		}
-		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
-		nflush = ndeferred;
-	}
-
-	/* Handle all deferred slab dalloc. */
-	assert(small || dalloc_count == 0);
-	for (unsigned i = 0; i < dalloc_count; i++) {
-		edata_t *slab = dalloc_slabs[i];
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-
-	}
-
-	if (config_stats && !merged_stats) {
-		if (small) {
-			/*
-			 * The flush loop didn't happen to flush to this
-			 * thread's arena, so the stats didn't get merged.
-			 * Manually do so now.
-			 */
-			bin_t *bin = arena_bin_choose(tsdn, tcache_arena,
-			    binind, NULL);
-			malloc_mutex_lock(tsdn, &bin->lock);
-			bin->stats.nflushes++;
-			bin->stats.nrequests += cache_bin->tstats.nrequests;
-			cache_bin->tstats.nrequests = 0;
-			malloc_mutex_unlock(tsdn, &bin->lock);
-		} else {
-			arena_stats_large_flush_nrequests_add(tsdn,
-			    &tcache_arena->stats, binind,
-			    cache_bin->tstats.nrequests);
-			cache_bin->tstats.nrequests = 0;
-		}
-	}
-
-}
-
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
+	assert(rem <= cache_bin_ncached_max_get(cache_bin));
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
+	cache_bin_sz_t orig_nstashed = cache_bin_nstashed_get_local(cache_bin);
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[binind]);
-	assert((cache_bin_sz_t)rem <= ncached);
-	unsigned nflush = ncached - rem;
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	assert((cache_bin_sz_t)rem <= ncached + orig_nstashed);
+	if ((cache_bin_sz_t)rem > ncached) {
+		/*
+		 * The flush_stashed above could have done enough flushing, if
+		 * there were many items stashed.  Validate that: 1) non zero
+		 * stashed, and 2) bin stack has available space now.
+		 */
+		assert(orig_nstashed > 0);
+		assert(ncached + cache_bin_nstashed_get_local(cache_bin)
+		    < cache_bin_ncached_max_get(cache_bin));
+		/* Still go through the flush logic for stats purpose only. */
+		rem = ncached;
+	}
+	cache_bin_sz_t nflush = ncached - (cache_bin_sz_t)rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
-	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
-	    &ptrs, nflush);
+	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
 
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
-	    small);
+	arena_ptr_array_flush(tsd, binind, &ptrs, nflush, small,
+	    tcache->tcache_slow->arena, cache_bin->tstats);
 
-	cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
-	    ncached - rem);
+	cache_bin_finish_flush(cache_bin, &ptrs, nflush);
 }
 
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, true);
+	tcache_nfill_small_burst_reset(tcache->tcache_slow, binind);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem,
+	    /* small */ true);
 }
 
 void
 tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, false);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem,
+	    /* small */ false);
 }
 
 /*
@@ -548,35 +679,67 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, bool is_small) {
-	cache_bin_info_t *info = &tcache_bin_info[binind];
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
 	/*
 	 * The two below are for assertion only.  The content of original cached
 	 * items remain unchanged -- the stashed items reside on the other end
 	 * of the stack.  Checking the stack head and ncached to verify.
 	 */
-	void *head_content = *cache_bin->stack_head;
-	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin,
-	    info);
+	void          *head_content = *cache_bin->stack_head;
+	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin);
 
-	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin, info);
-	assert(orig_cached + nstashed <= cache_bin_info_ncached_max(info));
+	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin);
+	assert(orig_cached + nstashed <= cache_bin_ncached_max_get(cache_bin));
 	if (nstashed == 0) {
 		return;
 	}
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nstashed);
-	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, info, &ptrs,
-	    nstashed);
+	cache_bin_init_ptr_array_for_stashed(
+	    cache_bin, binind, &ptrs, nstashed);
 	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nstashed,
-	    is_small);
-	cache_bin_finish_flush_stashed(cache_bin, info);
+	arena_ptr_array_flush(tsd, binind, &ptrs, nstashed, is_small,
+	    tcache->tcache_slow->arena, cache_bin->tstats);
+	cache_bin_finish_flush_stashed(cache_bin);
 
-	assert(cache_bin_nstashed_get_local(cache_bin, info) == 0);
-	assert(cache_bin_ncached_get_local(cache_bin, info) == orig_cached);
+	assert(cache_bin_nstashed_get_local(cache_bin) == 0);
+	assert(cache_bin_ncached_get_local(cache_bin) == orig_cached);
 	assert(head_content == *cache_bin->stack_head);
 }
 
+JET_EXTERN bool
+tcache_get_default_ncached_max_set(szind_t ind) {
+	return opt_tcache_ncached_max_set[ind];
+}
+
+JET_EXTERN const cache_bin_info_t *
+tcache_get_default_ncached_max(void) {
+	return opt_tcache_ncached_max;
+}
+
+bool
+tcache_bin_ncached_max_read(
+    tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max) {
+	if (bin_size > TCACHE_MAXCLASS_LIMIT) {
+		return true;
+	}
+
+	if (!tcache_available(tsd)) {
+		*ncached_max = 0;
+		return false;
+	}
+
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	szind_t bin_ind = sz_size2index(bin_size);
+
+	cache_bin_t *bin = &tcache->bins[bin_ind];
+	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow)
+	    ? 0
+	    : cache_bin_ncached_max_get(bin);
+	return false;
+}
+
 void
 tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena) {
@@ -599,17 +762,17 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 }
 
 static void
-tcache_arena_dissociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache) {
+tcache_arena_dissociate(
+    tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	arena_t *arena = tcache_slow->arena;
 	assert(arena != NULL);
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		if (config_debug) {
-			bool in_ql = false;
+			bool           in_ql = false;
 			tcache_slow_t *iter;
-			ql_foreach(iter, &arena->tcache_ql, link) {
+			ql_foreach (iter, &arena->tcache_ql, link) {
 				if (iter == tcache_slow) {
 					in_ql = true;
 					break;
@@ -633,94 +796,182 @@ tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 	tcache_arena_associate(tsdn, tcache_slow, tcache, arena);
 }
 
-bool
-tsd_tcache_enabled_data_init(tsd_t *tsd) {
-	/* Called upon tsd initialization. */
-	tsd_tcache_enabled_set(tsd, opt_tcache);
-	tsd_slow_update(tsd);
-
-	if (opt_tcache) {
-		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd);
-	}
-
-	return false;
+static void
+tcache_default_settings_init(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
+	assert(global_do_not_change_tcache_maxclass != 0);
+	assert(global_do_not_change_tcache_nbins != 0);
+	tcache_slow->tcache_nbins = global_do_not_change_tcache_nbins;
 }
 
 static void
-tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    void *mem) {
+tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, void *mem,
+    const cache_bin_info_t *tcache_bin_info) {
 	tcache->tcache_slow = tcache_slow;
 	tcache_slow->tcache = tcache;
 
 	memset(&tcache_slow->link, 0, sizeof(ql_elm(tcache_t)));
+	nstime_init_zero(&tcache_slow->last_gc_time);
 	tcache_slow->next_gc_bin = 0;
+	tcache_slow->next_gc_bin_small = 0;
+	tcache_slow->next_gc_bin_large = SC_NBINS;
 	tcache_slow->arena = NULL;
 	tcache_slow->dyn_alloc = mem;
 
 	/*
 	 * We reserve cache bins for all small size classes, even if some may
-	 * not get used (i.e. bins higher than nhbins).  This allows the fast
-	 * and common paths to access cache bin metadata safely w/o worrying
-	 * about which ones are disabled.
+	 * not get used (i.e. bins higher than tcache_nbins).  This allows
+	 * the fast and common paths to access cache bin metadata safely w/o
+	 * worrying about which ones are disabled.
 	 */
-	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
-	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
-
-	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
-	    &cur_offset);
-	for (unsigned i = 0; i < nhbins; i++) {
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
+	size_t   cur_offset = 0;
+	cache_bin_preincrement(tcache_bin_info, tcache_nbins, mem, &cur_offset);
+	for (unsigned i = 0; i < tcache_nbins; i++) {
 		if (i < SC_NBINS) {
-			tcache_slow->lg_fill_div[i] = 1;
+			tcache_bin_fill_ctl_init(tcache_slow, i);
 			tcache_slow->bin_refilled[i] = false;
-			tcache_slow->bin_flush_delay_items[i]
-			    = tcache_gc_item_delay_compute(i);
+			tcache_slow->bin_flush_delay_items[i] =
+			    tcache_gc_item_delay_compute(i);
 		}
 		cache_bin_t *cache_bin = &tcache->bins[i];
-		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
-		    &cur_offset);
+		if (tcache_bin_info[i].ncached_max > 0) {
+			cache_bin_init(
+			    cache_bin, &tcache_bin_info[i], mem, &cur_offset);
+		} else {
+			cache_bin_init_disabled(
+			    cache_bin, tcache_bin_info[i].ncached_max);
+		}
 	}
 	/*
-	 * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
-	 * their cache bins are initialized to a state to safely and efficiently
-	 * fail all fastpath alloc / free, so that no additional check around
-	 * nhbins is needed on fastpath.
+	 * Initialize all disabled bins to a state that can safely and
+	 * efficiently fail all fastpath alloc / free, so that no additional
+	 * check around tcache_nbins is needed on fastpath.  Yet we still
+	 * store the ncached_max in the bin_info for future usage.
 	 */
-	for (unsigned i = nhbins; i < SC_NBINS; i++) {
-		/* Disabled small bins. */
+	for (unsigned i = tcache_nbins; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
-		void *fake_stack = mem;
-		size_t fake_offset = 0;
-
-		cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
-		    &fake_offset);
-		assert(tcache_small_bin_disabled(i, cache_bin));
+		cache_bin_init_disabled(
+		    cache_bin, tcache_bin_info[i].ncached_max);
+		assert(tcache_bin_disabled(i, cache_bin, tcache->tcache_slow));
 	}
 
-	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
-	    &cur_offset);
-	/* Sanity check that the whole stack is used. */
-	assert(cur_offset == tcache_bin_alloc_size);
+	cache_bin_postincrement(mem, &cur_offset);
+	if (config_debug) {
+		/* Sanity check that the whole stack is used. */
+		size_t size, alignment;
+		cache_bin_info_compute_alloc(
+		    tcache_bin_info, tcache_nbins, &size, &alignment);
+		assert(cur_offset == size);
+	}
 }
 
-/* Initialize auto tcache (embedded in TSD). */
-bool
-tsd_tcache_data_init(tsd_t *tsd) {
+static inline unsigned
+tcache_ncached_max_compute(szind_t szind) {
+	if (szind >= SC_NBINS) {
+		return opt_tcache_nslots_large;
+	}
+	unsigned slab_nregs = bin_infos[szind].nregs;
+
+	/* We may modify these values; start with the opt versions. */
+	unsigned nslots_small_min = opt_tcache_nslots_small_min;
+	unsigned nslots_small_max = opt_tcache_nslots_small_max;
+
+	/*
+	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
+	 * suitable for a cache bin size.
+	 */
+	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
+		nslots_small_max = CACHE_BIN_NCACHED_MAX;
+	}
+	if (nslots_small_min % 2 != 0) {
+		nslots_small_min++;
+	}
+	if (nslots_small_max % 2 != 0) {
+		nslots_small_max--;
+	}
+	if (nslots_small_min < 2) {
+		nslots_small_min = 2;
+	}
+	if (nslots_small_max < 2) {
+		nslots_small_max = 2;
+	}
+	if (nslots_small_min > nslots_small_max) {
+		nslots_small_min = nslots_small_max;
+	}
+
+	unsigned candidate;
+	if (opt_lg_tcache_nslots_mul < 0) {
+		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
+	} else {
+		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
+	}
+	if (candidate % 2 != 0) {
+		/*
+		 * We need the candidate size to be even -- we assume that we
+		 * can divide by two and get a positive number (e.g. when
+		 * flushing).
+		 */
+		++candidate;
+	}
+	if (candidate <= nslots_small_min) {
+		return nslots_small_min;
+	} else if (candidate <= nslots_small_max) {
+		return candidate;
+	} else {
+		return nslots_small_max;
+	}
+}
+
+JET_EXTERN void
+tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	/*
+	 * Compute the values for each bin, but for bins with indices larger
+	 * than tcache_nbins, no items will be cached.
+	 */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		unsigned ncached_max = tcache_get_default_ncached_max_set(i)
+		    ? (unsigned)tcache_get_default_ncached_max()[i].ncached_max
+		    : tcache_ncached_max_compute(i);
+		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
+		cache_bin_info_init(
+		    &tcache_bin_info[i], (cache_bin_sz_t)ncached_max);
+	}
+}
+
+static void *
+tcache_stack_alloc_impl(tsdn_t *tsdn, size_t size, size_t alignment) {
+	if (cache_bin_stack_use_thp()) {
+		/* Alignment is ignored since it comes from THP. */
+		assert(alignment == QUANTUM);
+		return b0_alloc_tcache_stack(tsdn, size);
+	}
+	size = sz_sa2u(size, alignment);
+	return ipallocztm(tsdn, size, alignment, true, NULL,
+	    true, arena_get(TSDN_NULL, 0, true));
+}
+
+void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
+    size_t alignment) = tcache_stack_alloc_impl;
+
+static bool
+tsd_tcache_data_init_impl(
+    tsd_t *tsd, arena_t *arena, const cache_bin_info_t *tcache_bin_info) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
-	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
+	tcache_t      *tcache = tsd_tcachep_get_unsafe(tsd);
 
 	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
-	size_t alignment = tcache_bin_alloc_alignment;
-	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
+	size_t   size, alignment;
+	cache_bin_info_compute_alloc(
+	    tcache_bin_info, tcache_nbins, &size, &alignment);
 
-	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
-	    true, arena_get(TSDN_NULL, 0, true));
+	void *mem = tcache_stack_alloc(tsd_tsdn(tsd), size, alignment);
 	if (mem == NULL) {
 		return true;
 	}
 
-	tcache_init(tsd, tcache_slow, tcache, mem);
+	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
 	/*
 	 * Initialization is a bit tricky here.  After malloc init is done, all
 	 * threads can rely on arena_choose and associate tcache accordingly.
@@ -730,18 +981,19 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	 * arena_choose_hard() will re-associate properly.
 	 */
 	tcache_slow->arena = NULL;
-	arena_t *arena;
 	if (!malloc_initialized()) {
 		/* If in initialization, assign to a0. */
 		arena = arena_get(tsd_tsdn(tsd), 0, false);
-		tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
-		    arena);
+		tcache_arena_associate(
+		    tsd_tsdn(tsd), tcache_slow, tcache, arena);
 	} else {
-		arena = arena_choose(tsd, NULL);
+		if (arena == NULL) {
+			arena = arena_choose(tsd, NULL);
+		}
 		/* This may happen if thread.tcache.enabled is used. */
 		if (tcache_slow->arena == NULL) {
-			tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
-			    tcache, arena);
+			tcache_arena_associate(
+			    tsd_tsdn(tsd), tcache_slow, tcache, arena);
 		}
 	}
 	assert(arena == tcache_slow->arena);
@@ -749,6 +1001,27 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	return false;
 }
 
+/* Initialize auto tcache (embedded in TSD). */
+static bool
+tsd_tcache_data_init(tsd_t *tsd, arena_t *arena,
+    const cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	assert(tcache_bin_info != NULL);
+	bool err = tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+	if (unlikely(err)) {
+		/*
+		 * Disable the tcache before calling malloc_write to
+		 * avoid recursive allocations through libc hooks.
+		 */
+		tsd_tcache_enabled_set(tsd, false);
+		tsd_slow_update(tsd);
+		malloc_write("<jemalloc>: Failed to allocate tcache data\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+	return err;
+}
+
 /* Created manual tcache for tcache.create mallctl. */
 tcache_t *
 tcache_create_explicit(tsd_t *tsd) {
@@ -757,35 +1030,179 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	size_t size = tcache_bin_alloc_size + sizeof(tcache_t)
-	    + sizeof(tcache_slow_t);
+	unsigned tcache_nbins = global_do_not_change_tcache_nbins;
+	size_t   tcache_size, alignment;
+	cache_bin_info_compute_alloc(tcache_get_default_ncached_max(),
+	    tcache_nbins, &tcache_size, &alignment);
+
+	size_t size = tcache_size + sizeof(tcache_t) + sizeof(tcache_slow_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
-	size = sz_sa2u(size, tcache_bin_alloc_alignment);
+	size = sz_sa2u(size, alignment);
 
-	void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
-	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (mem == NULL) {
 		return NULL;
 	}
-	tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
-	tcache_slow_t *tcache_slow =
-	    (void *)((uintptr_t)mem + tcache_bin_alloc_size + sizeof(tcache_t));
-	tcache_init(tsd, tcache_slow, tcache, mem);
+	tcache_t      *tcache = (void *)((byte_t *)mem + tcache_size);
+	tcache_slow_t *tcache_slow = (void *)((byte_t *)mem + tcache_size
+	    + sizeof(tcache_t));
+	tcache_default_settings_init(tcache_slow);
+	tcache_init(
+	    tsd, tcache_slow, tcache, mem, tcache_get_default_ncached_max());
 
-	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
-	    arena_ichoose(tsd, NULL));
+	tcache_arena_associate(
+	    tsd_tsdn(tsd), tcache_slow, tcache, arena_ichoose(tsd, NULL));
 
 	return tcache;
 }
 
+bool
+tsd_tcache_enabled_data_init(tsd_t *tsd) {
+	/* Called upon tsd initialization. */
+	tsd_tcache_enabled_set(tsd, opt_tcache);
+	/*
+	 * tcache is not available yet, but we need to set up its tcache_nbins
+	 * in advance.
+	 */
+	tcache_default_settings_init(tsd_tcache_slowp_get(tsd));
+	tsd_slow_update(tsd);
+
+	if (opt_tcache) {
+		/* Trigger tcache init. */
+		return tsd_tcache_data_init(
+			tsd, NULL, tcache_get_default_ncached_max());
+	}
+
+	return false;
+}
+
+void
+tcache_enabled_set(tsd_t *tsd, bool enabled) {
+	bool was_enabled = tsd_tcache_enabled_get(tsd);
+
+	if (!was_enabled && enabled) {
+		if (tsd_tcache_data_init(
+		    tsd, NULL, tcache_get_default_ncached_max())) {
+			return;
+		}
+	} else if (was_enabled && !enabled) {
+		tcache_cleanup(tsd);
+	}
+	/* Commit the state last.  Above calls check current state. */
+	tsd_tcache_enabled_set(tsd, enabled);
+	tsd_slow_update(tsd);
+}
+
+bool
+thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	assert(tcache_max == sz_s2u(tcache_max));
+	tcache_t        *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t   *tcache_slow = tcache->tcache_slow;
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	bool             ret = false;
+	assert(tcache != NULL && tcache_slow != NULL);
+
+	bool                    enabled = tcache_available(tsd);
+	arena_t *assigned_arena JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
+	if (enabled) {
+		assigned_arena = tcache_slow->arena;
+		/* Carry over the bin settings during the reboot. */
+		tcache_bin_settings_backup(tcache, tcache_bin_info);
+		/* Shutdown and reboot the tcache for a clean slate. */
+		tcache_cleanup(tsd);
+	}
+
+	/*
+	* Still set tcache_nbins of the tcache even if the tcache is not
+	* available yet because the values are stored in tsd_t and are
+	* always available for changing.
+	*/
+	tcache_max_set(tcache_slow, tcache_max);
+
+	if (enabled) {
+		ret = tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
+	}
+
+	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
+	return ret;
+}
+
+static bool
+tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
+    size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
+    bool bin_info_is_set[TCACHE_NBINS_MAX]) {
+	do {
+		size_t size_start, size_end;
+		size_t ncached_max;
+		bool   err = multi_setting_parse_next(&bin_settings_segment_cur,
+		      &len_left, &size_start, &size_end, &ncached_max);
+		if (err) {
+			return true;
+		}
+		if (size_end > TCACHE_MAXCLASS_LIMIT) {
+			size_end = TCACHE_MAXCLASS_LIMIT;
+		}
+		if (size_start > TCACHE_MAXCLASS_LIMIT
+		    || size_start > size_end) {
+			continue;
+		}
+		/* May get called before sz_init (during malloc_conf_init). */
+		szind_t bin_start = sz_size2index_compute(size_start);
+		szind_t bin_end = sz_size2index_compute(size_end);
+		if (ncached_max > CACHE_BIN_NCACHED_MAX) {
+			ncached_max = (size_t)CACHE_BIN_NCACHED_MAX;
+		}
+		for (szind_t i = bin_start; i <= bin_end; i++) {
+			cache_bin_info_init(
+			    &tcache_bin_info[i], (cache_bin_sz_t)ncached_max);
+			if (bin_info_is_set != NULL) {
+				bin_info_is_set[i] = true;
+			}
+		}
+	} while (len_left > 0);
+
+	return false;
+}
+
+bool
+tcache_bin_info_default_init(
+    const char *bin_settings_segment_cur, size_t len_left) {
+	return tcache_bin_info_settings_parse(bin_settings_segment_cur,
+	    len_left, opt_tcache_ncached_max, opt_tcache_ncached_max_set);
+}
+
+bool
+tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
+	assert(tcache_available(tsd));
+	assert(len != 0);
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	tcache_bin_settings_backup(tcache, tcache_bin_info);
+
+	if (tcache_bin_info_settings_parse(
+	        settings, len, tcache_bin_info, NULL)) {
+		return true;
+	}
+
+	arena_t *assigned_arena = tcache->tcache_slow->arena;
+	tcache_cleanup(tsd);
+	return tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
+}
+
 static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	assert(tcache_slow->arena != NULL);
 
-	for (unsigned i = 0; i < nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nbins_get(tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache_slow)) {
+			continue;
+		}
 		if (i < SC_NBINS) {
 			tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
 		} else {
@@ -812,10 +1229,14 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 
 	if (tsd_tcache) {
 		cache_bin_t *cache_bin = &tcache->bins[0];
-		cache_bin_assert_empty(cache_bin, &tcache_bin_info[0]);
+		cache_bin_assert_empty(cache_bin);
+	}
+	if (tsd_tcache && cache_bin_stack_use_thp()) {
+		b0_dalloc_tcache_stack(tsd_tsdn(tsd), tcache_slow->dyn_alloc);
+	} else {
+		idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL,
+		    true, true);
 	}
-	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
-	    true);
 
 	/*
 	 * The deallocation and tcache flush above may not trigger decay since
@@ -823,11 +1244,11 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	 * tsd).  Manually trigger decay to avoid pathological cases.  Also
 	 * include arena 0 because the tcache array is allocated from it.
 	 */
-	arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
-	    false, false);
+	arena_decay(
+	    tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false), false, false);
 
-	if (arena_nthreads_get(arena, false) == 0 &&
-	    !background_thread_enabled()) {
+	if (arena_nthreads_get(arena, false) == 0
+	    && !background_thread_enabled()) {
 		/* Force purging when no threads assigned to the arena anymore. */
 		arena_decay(tsd_tsdn(tsd), arena,
 		    /* is_background_thread */ false, /* all */ true);
@@ -850,13 +1271,8 @@ tcache_cleanup(tsd_t *tsd) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 
 	tcache_destroy(tsd, tcache, true);
-	if (config_debug) {
-		/*
-		 * For debug testing only, we want to pretend we're still in the
-		 * zero-initialized state.
-		 */
-		memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
-	}
+	/* Make sure all bins used are reinitialized to the clean state. */
+	memset(tcache->bins, 0, sizeof(cache_bin_t) * TCACHE_NBINS_MAX);
 }
 
 void
@@ -864,10 +1280,13 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	cassert(config_stats);
 
 	/* Merge and reset tcache stats. */
-	for (unsigned i = 0; i < nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) {
+			continue;
+		}
 		if (i < SC_NBINS) {
-			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
+			bin_t *bin = bin_choose(tsdn, arena, i, NULL);
 			malloc_mutex_lock(tsdn, &bin->lock);
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			malloc_mutex_unlock(tsdn, &bin->lock);
@@ -887,7 +1306,7 @@ tcaches_create_prep(tsd_t *tsd, base_t *base) {
 
 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), base,
-		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
+		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX + 1), CACHELINE);
 		if (tcaches == NULL) {
 			err = true;
 			goto label_return;
@@ -978,7 +1397,7 @@ void
 tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_t *elm = &tcaches[ind];
-	tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
+	tcache_t  *tcache = tcaches_elm_remove(tsd, elm, false);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
@@ -987,97 +1406,25 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	}
 }
 
-static unsigned
-tcache_ncached_max_compute(szind_t szind) {
-	if (szind >= SC_NBINS) {
-		assert(szind < nhbins);
-		return opt_tcache_nslots_large;
-	}
-	unsigned slab_nregs = bin_infos[szind].nregs;
-
-	/* We may modify these values; start with the opt versions. */
-	unsigned nslots_small_min = opt_tcache_nslots_small_min;
-	unsigned nslots_small_max = opt_tcache_nslots_small_max;
-
-	/*
-	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
-	 * suitable for a cache bin size.
-	 */
-	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
-		nslots_small_max = CACHE_BIN_NCACHED_MAX;
-	}
-	if (nslots_small_min % 2 != 0) {
-		nslots_small_min++;
-	}
-	if (nslots_small_max % 2 != 0) {
-		nslots_small_max--;
-	}
-	if (nslots_small_min < 2) {
-		nslots_small_min = 2;
-	}
-	if (nslots_small_max < 2) {
-		nslots_small_max = 2;
-	}
-	if (nslots_small_min > nslots_small_max) {
-		nslots_small_min = nslots_small_max;
-	}
-
-	unsigned candidate;
-	if (opt_lg_tcache_nslots_mul < 0) {
-		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
-	} else {
-		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
-	}
-	if (candidate % 2 != 0) {
-		/*
-		 * We need the candidate size to be even -- we assume that we
-		 * can divide by two and get a positive number (e.g. when
-		 * flushing).
-		 */
-		++candidate;
-	}
-	if (candidate <= nslots_small_min) {
-		return nslots_small_min;
-	} else if (candidate <= nslots_small_max) {
-		return candidate;
-	} else {
-		return nslots_small_max;
-	}
-}
-
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
-	tcache_maxclass = sz_s2u(opt_tcache_max);
-	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
-	nhbins = sz_size2index(tcache_maxclass) + 1;
+	global_do_not_change_tcache_maxclass = sz_s2u(opt_tcache_max);
+	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
+	global_do_not_change_tcache_nbins =
+	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
+	/*
+	 * Pre-compute default bin info and store the results in
+	 * opt_tcache_ncached_max. After the changes here,
+	 * opt_tcache_ncached_max should not be modified and should always be
+	 * accessed using tcache_get_default_ncached_max.
+	 */
+	tcache_bin_info_compute(opt_tcache_ncached_max);
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
-	/* Initialize tcache_bin_info.  See comments in tcache_init(). */
-	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
-	size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
-	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
-	    CACHELINE);
-	if (tcache_bin_info == NULL) {
-		return true;
-	}
-
-	for (szind_t i = 0; i < nhbins; i++) {
-		unsigned ncached_max = tcache_ncached_max_compute(i);
-		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
-	}
-	for (szind_t i = nhbins; i < SC_NBINS; i++) {
-		/* Disabled small bins. */
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-		assert(tcache_small_bin_disabled(i, NULL));
-	}
-
-	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
-	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
-
 	return false;
 }
 
@@ -1096,6 +1443,20 @@ tcache_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
 }
 
-void tcache_assert_initialized(tcache_t *tcache) {
+void
+tcache_assert_initialized(tcache_t *tcache) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 }
+
+static te_enabled_t
+tcache_gc_enabled(void) {
+	return (opt_tcache_gc_incr_bytes > 0) ? te_enabled_yes : te_enabled_no;
+}
+
+/* Handles alloc and dalloc the same way */
+te_base_cb_t tcache_gc_te_handler = {
+    .enabled = &tcache_gc_enabled,
+    .new_event_wait = &tcache_gc_new_event_wait,
+    .postponed_event_wait = &tcache_gc_postponed_event_wait,
+    .event_handler = &tcache_gc_event,
+};
diff --git a/src/test_hooks.c b/src/test_hooks.c
index ace00d9c..40621199 100644
--- a/src/test_hooks.c
+++ b/src/test_hooks.c
@@ -6,7 +6,7 @@
  * from outside the generated library, so that we can use them in test code.
  */
 JEMALLOC_EXPORT
-void (*test_hooks_arena_new_hook)() = NULL;
+void (*test_hooks_arena_new_hook)(void) = NULL;
 
 JEMALLOC_EXPORT
-void (*test_hooks_libc_hook)() = NULL;
+void (*test_hooks_libc_hook)(void) = NULL;
diff --git a/src/thread_event.c b/src/thread_event.c
index 37eb5827..82776342 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -2,103 +2,48 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/thread_event.h"
-
-/*
- * Signatures for event specific functions.  These functions should be defined
- * by the modules owning each event.  The signatures here verify that the
- * definitions follow the right format.
- *
- * The first two are functions computing new / postponed event wait time.  New
- * event wait time is the time till the next event if an event is currently
- * being triggered; postponed event wait time is the time till the next event
- * if an event should be triggered but needs to be postponed, e.g. when the TSD
- * is not nominal or during reentrancy.
- *
- * The third is the event handler function, which is called whenever an event
- * is triggered.  The parameter is the elapsed time since the last time an
- * event of the same type was triggered.
- */
-#define E(event, condition_unused, is_alloc_event_unused)		\
-uint64_t event##_new_event_wait(tsd_t *tsd);				\
-uint64_t event##_postponed_event_wait(tsd_t *tsd);			\
-void event##_event_handler(tsd_t *tsd, uint64_t elapsed);
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
-/* Signatures for internal functions fetching elapsed time. */
-#define E(event, condition_unused, is_alloc_event_unused)		\
-static uint64_t event##_fetch_elapsed(tsd_t *tsd);
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
-static uint64_t
-tcache_gc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-tcache_gc_dalloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-prof_sample_fetch_elapsed(tsd_t *tsd) {
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
-	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
-	prof_sample_last_event_set(tsd, last_event);
-	return last_event - last_sample_event;
-}
-
-static uint64_t
-stats_interval_fetch_elapsed(tsd_t *tsd) {
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
-	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
-	stats_interval_last_event_set(tsd, last_event);
-	return last_event - last_stats_event;
-}
-
-static uint64_t
-peak_alloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-peak_dalloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-/* Per event facilities done. */
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/peak_event.h"
 
 static bool
 te_ctx_has_active_events(te_ctx_t *ctx) {
 	assert(config_debug);
-#define E(event, condition, alloc_event)			       \
-	if (condition && alloc_event == ctx->is_alloc) {	       \
-		return true;					       \
+	if (ctx->is_alloc) {
+		for (int i = 0; i < te_alloc_count; ++i) {
+			if (te_enabled_yes == te_alloc_handlers[i]->enabled()) {
+				return true;
+			}
+		}
+	} else {
+		for (int i = 0; i < te_dalloc_count; ++i) {
+			if (te_enabled_yes
+			    == te_dalloc_handlers[i]->enabled()) {
+				return true;
+			}
+		}
 	}
-	ITERATE_OVER_ALL_EVENTS
-#undef E
 	return false;
 }
 
 static uint64_t
 te_next_event_compute(tsd_t *tsd, bool is_alloc) {
-	uint64_t wait = TE_MAX_START_WAIT;
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait =					\
-		    event##_event_wait_get(tsd);			\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
-		if (event_wait > 0U && event_wait < wait) {		\
-			wait = event_wait;				\
-		}							\
-	}
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers
+	                                   : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait
+	                           : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int       count = is_alloc ? te_alloc_count : te_dalloc_count;
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-	assert(wait <= TE_MAX_START_WAIT);
+	uint64_t wait = TE_MAX_START_WAIT;
+
+	for (int i = 0; i < count; i++) {
+		if (te_enabled_yes == handlers[i]->enabled()) {
+			uint64_t ev_wait = waits[i];
+			assert(ev_wait <= TE_MAX_START_WAIT);
+			if (ev_wait > 0U && ev_wait < wait) {
+				wait = ev_wait;
+			}
+		}
+	}
 	return wait;
 }
 
@@ -121,6 +66,19 @@ te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
 
 	/* The subtraction is intentionally susceptible to underflow. */
 	assert(current_bytes - last_event < interval);
+
+	/* This computation assumes that event did not become active in the
+	 * time since the last trigger. This works fine if waits for inactive
+	 * events are initialized with 0 as those are ignored
+	 * If we wanted to initialize user events to anything other than
+	 * zero, computation would take it into account and min_wait could
+	 * be smaller than interval (as it was not part of the calc setting
+	 * next_event).
+	 *
+	 * If we ever wanted to unregister the events assert would also
+	 * need to account for the possibility that next_event was set, by
+	 * event that is now gone
+	 */
 	uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx));
 	/*
 	 * next_event should have been pushed up only except when no event is
@@ -128,9 +86,9 @@ te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
 	 * below is stronger than needed, but having an exactly accurate guard
 	 * is more complicated to implement.
 	 */
-	assert((!te_ctx_has_active_events(ctx) && last_event == 0U) ||
-	    interval == min_wait ||
-	    (interval < min_wait && interval == TE_MAX_INTERVAL));
+	assert((!te_ctx_has_active_events(ctx) && last_event == 0U)
+	    || interval == min_wait
+	    || (interval < min_wait && interval == TE_MAX_INTERVAL));
 }
 
 void
@@ -193,8 +151,9 @@ te_assert_invariants_debug(tsd_t *tsd) {
 static void
 te_ctx_next_event_fast_update(te_ctx_t *ctx) {
 	uint64_t next_event = te_ctx_next_event_get(ctx);
-	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ?
-	    next_event : 0U;
+	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX)
+	    ? next_event
+	    : 0U;
 	te_ctx_next_event_fast_set(ctx, next_event_fast);
 }
 
@@ -218,9 +177,8 @@ te_recompute_fast_threshold(tsd_t *tsd) {
 	}
 }
 
-static void
-te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
-    uint64_t wait) {
+static inline void
+te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait) {
 	/*
 	 * The next threshold based on future events can only be adjusted after
 	 * progressing the last_event counter (which is set to current).
@@ -228,23 +186,165 @@ te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
 	assert(te_ctx_current_bytes_get(ctx) == te_ctx_last_event_get(ctx));
 	assert(wait <= TE_MAX_START_WAIT);
 
-	uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
-	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
+	uint64_t next_event = te_ctx_last_event_get(ctx)
+	    + (wait <= TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
 	te_ctx_next_event_set(tsd, ctx, next_event);
 }
+void
+te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait) {
+	te_adjust_thresholds_impl(tsd, ctx, wait);
+}
 
-static uint64_t
-te_clip_event_wait(uint64_t event_wait) {
-	assert(event_wait > 0U);
-	if (TE_MIN_START_WAIT > 1U &&
-	    unlikely(event_wait < TE_MIN_START_WAIT)) {
-		event_wait = TE_MIN_START_WAIT;
+static void
+te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers
+	                                   : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait
+	                           : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int       count = is_alloc ? te_alloc_count : te_dalloc_count;
+	for (int i = 0; i < count; i++) {
+		if (te_enabled_yes == handlers[i]->enabled()) {
+			uint64_t ev_wait = handlers[i]->new_event_wait(tsd);
+			assert(ev_wait > 0);
+			waits[i] = ev_wait;
+			if (ev_wait < *wait) {
+				*wait = ev_wait;
+			}
+		}
 	}
-	if (TE_MAX_START_WAIT < UINT64_MAX &&
-	    unlikely(event_wait > TE_MAX_START_WAIT)) {
-		event_wait = TE_MAX_START_WAIT;
+}
+
+static inline bool
+te_update_wait(tsd_t *tsd, uint64_t accumbytes, bool allow, uint64_t *ev_wait,
+    uint64_t *wait, te_base_cb_t *handler, uint64_t new_wait) {
+	bool ret = false;
+	if (*ev_wait > accumbytes) {
+		*ev_wait -= accumbytes;
+	} else if (!allow) {
+		*ev_wait = handler->postponed_event_wait(tsd);
+	} else {
+		ret = true;
+		*ev_wait = new_wait == 0 ? handler->new_event_wait(tsd)
+		                         : new_wait;
 	}
-	return event_wait;
+
+	assert(*ev_wait > 0);
+	if (*ev_wait < *wait) {
+		*wait = *ev_wait;
+	}
+	return ret;
+}
+
+extern uint64_t stats_interval_accum_batch;
+/* Return number of handlers enqueued into to_trigger array */
+static inline size_t
+te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
+    uint64_t accumbytes, bool allow, uint64_t *wait) {
+	/*
+	 * We do not loop and invoke the functions via interface because
+	 * of the perf cost.  This path is relatively hot, so we sacrifice
+	 * elegance for perf.
+	 */
+	size_t    nto_trigger = 0;
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
+	if (opt_tcache_gc_incr_bytes > 0) {
+		assert(te_enabled_yes
+		    == te_alloc_handlers[te_alloc_tcache_gc]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+		        &waits[te_alloc_tcache_gc], wait,
+		        te_alloc_handlers[te_alloc_tcache_gc],
+		        opt_tcache_gc_incr_bytes)) {
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_tcache_gc];
+		}
+	}
+#ifdef JEMALLOC_PROF
+	if (opt_prof) {
+		assert(te_enabled_yes
+		    == te_alloc_handlers[te_alloc_prof_sample]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+		        &waits[te_alloc_prof_sample], wait,
+		        te_alloc_handlers[te_alloc_prof_sample], 0)) {
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_prof_sample];
+		}
+	}
+#endif
+	if (opt_stats_interval >= 0) {
+		if (te_update_wait(tsd, accumbytes, allow,
+		        &waits[te_alloc_stats_interval], wait,
+		        te_alloc_handlers[te_alloc_stats_interval],
+		        stats_interval_accum_batch)) {
+			assert(te_enabled_yes
+			    == te_alloc_handlers[te_alloc_stats_interval]
+			           ->enabled());
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_stats_interval];
+		}
+	}
+
+#ifdef JEMALLOC_STATS
+	assert(te_enabled_yes == te_alloc_handlers[te_alloc_peak]->enabled());
+	if (te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
+	        te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
+		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
+	}
+
+#endif
+
+	for (te_alloc_t ue = te_alloc_user0; ue <= te_alloc_user3; ue++) {
+		te_enabled_t status = te_user_event_enabled(
+		    ue - te_alloc_user0, true);
+		if (status == te_enabled_not_installed) {
+			break;
+		} else if (status == te_enabled_yes) {
+			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
+			        wait, te_alloc_handlers[ue], 0)) {
+				to_trigger[nto_trigger++] =
+				    te_alloc_handlers[ue];
+			}
+		}
+	}
+	return nto_trigger;
+}
+
+static inline size_t
+te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
+    uint64_t accumbytes, bool allow, uint64_t *wait) {
+	size_t    nto_trigger = 0;
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	if (opt_tcache_gc_incr_bytes > 0) {
+		assert(te_enabled_yes
+		    == te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+		        &waits[te_dalloc_tcache_gc], wait,
+		        te_dalloc_handlers[te_dalloc_tcache_gc],
+		        opt_tcache_gc_incr_bytes)) {
+			to_trigger[nto_trigger++] =
+			    te_dalloc_handlers[te_dalloc_tcache_gc];
+		}
+	}
+#ifdef JEMALLOC_STATS
+	assert(te_enabled_yes == te_dalloc_handlers[te_dalloc_peak]->enabled());
+	if (te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
+	        te_dalloc_handlers[te_dalloc_peak], PEAK_EVENT_WAIT)) {
+		to_trigger[nto_trigger++] = te_dalloc_handlers[te_dalloc_peak];
+	}
+#endif
+	for (te_dalloc_t ue = te_dalloc_user0; ue <= te_dalloc_user3; ue++) {
+		te_enabled_t status = te_user_event_enabled(
+		    ue - te_dalloc_user0, false);
+		if (status == te_enabled_not_installed) {
+			break;
+		} else if (status == te_enabled_yes) {
+			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
+			        wait, te_dalloc_handlers[ue], 0)) {
+				to_trigger[nto_trigger++] =
+				    te_dalloc_handlers[ue];
+			}
+		}
+	}
+	return nto_trigger;
 }
 
 void
@@ -256,49 +356,30 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 
 	te_ctx_last_event_set(ctx, bytes_after);
 
-	bool allow_event_trigger = tsd_nominal(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0;
-	bool is_alloc = ctx->is_alloc;
+	bool allow_event_trigger = tsd_nominal(tsd)
+	    && tsd_reentrancy_level_get(tsd) == 0;
 	uint64_t wait = TE_MAX_START_WAIT;
 
-#define E(event, condition, alloc_event)				\
-	bool is_##event##_triggered = false;				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
-		if (event_wait > accumbytes) {				\
-			event_wait -= accumbytes;			\
-		} else if (!allow_event_trigger) {			\
-			event_wait = event##_postponed_event_wait(tsd);	\
-		} else {						\
-			is_##event##_triggered = true;			\
-			event_wait = event##_new_event_wait(tsd);	\
-		}							\
-		event_wait = te_clip_event_wait(event_wait);		\
-		event##_event_wait_set(tsd, event_wait);		\
-		if (event_wait < wait) {				\
-			wait = event_wait;				\
-		}							\
+	assert((int)te_alloc_count >= (int)te_dalloc_count);
+	te_base_cb_t *to_trigger[te_alloc_count];
+	size_t        nto_trigger;
+	if (ctx->is_alloc) {
+		nto_trigger = te_update_alloc_events(
+		    tsd, to_trigger, accumbytes, allow_event_trigger, &wait);
+	} else {
+		nto_trigger = te_update_dalloc_events(
+		    tsd, to_trigger, accumbytes, allow_event_trigger, &wait);
 	}
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-
 	assert(wait <= TE_MAX_START_WAIT);
 	te_adjust_thresholds_helper(tsd, ctx, wait);
 	te_assert_invariants(tsd);
 
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition &&			\
-	    is_##event##_triggered) {					\
-		assert(allow_event_trigger);				\
-		uint64_t elapsed = event##_fetch_elapsed(tsd);		\
-		event##_event_handler(tsd, elapsed);			\
+	for (size_t i = 0; i < nto_trigger; i++) {
+		assert(allow_event_trigger);
+		to_trigger[i]->event_handler(tsd);
 	}
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-
 	te_assert_invariants(tsd);
 }
 
@@ -318,19 +399,9 @@ te_init(tsd_t *tsd, bool is_alloc) {
 	te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
 
 	uint64_t wait = TE_MAX_START_WAIT;
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait = event##_new_event_wait(tsd);	\
-		event_wait = te_clip_event_wait(event_wait);		\
-		event##_event_wait_set(tsd, event_wait);		\
-		if (event_wait < wait) {				\
-			wait = event_wait;				\
-		}							\
-	}
+	te_init_waits(tsd, &wait, is_alloc);
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-	te_adjust_thresholds_helper(tsd, &ctx, wait);
+	te_adjust_thresholds_impl(tsd, &ctx, wait);
 }
 
 void
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
new file mode 100644
index 00000000..b8307df0
--- /dev/null
+++ b/src/thread_event_registry.c
@@ -0,0 +1,238 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/tcache_externs.h"
+#include "jemalloc/internal/peak_event.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/stats.h"
+
+static malloc_mutex_t uevents_mu;
+
+bool
+experimental_thread_events_boot(void) {
+	return malloc_mutex_init(&uevents_mu, "thread_events",
+	    WITNESS_RANK_THREAD_EVENTS_USER, malloc_mutex_rank_exclusive);
+}
+
+#define TE_REGISTER_ERRCODE_FULL_SLOTS -1
+#define TE_REGISTER_ERRCODE_ALREADY_REGISTERED -2
+
+static user_hook_object_t uevents_storage[TE_MAX_USER_EVENTS] = {
+    {NULL, 0, false},
+};
+
+static atomic_p_t uevent_obj_p[TE_MAX_USER_EVENTS] = {
+    NULL,
+};
+
+static inline bool
+user_object_eq(user_hook_object_t *lhs, user_hook_object_t *rhs) {
+	assert(lhs != NULL && rhs != NULL);
+
+	return lhs->callback == rhs->callback && lhs->interval == rhs->interval
+	    && lhs->is_alloc_only == rhs->is_alloc_only;
+}
+
+/*
+ * Return slot number that event is registered at on success
+ *     it will be [0, TE_MAX_USER_EVENTS)
+ * Return negative value on some error
+ */
+static inline int
+te_register_user_handler_locked(user_hook_object_t *new_obj) {
+	/* Attempt to find the free slot in global register */
+	for (int i = 0; i < TE_MAX_USER_EVENTS; ++i) {
+		user_hook_object_t *p = (user_hook_object_t *)atomic_load_p(
+		    &uevent_obj_p[i], ATOMIC_ACQUIRE);
+
+		if (p && user_object_eq(p, new_obj)) {
+			/* Same callback and interval are registered - no error. */
+			return TE_REGISTER_ERRCODE_ALREADY_REGISTERED;
+		} else if (p == NULL) {
+			/* Empty slot */
+			uevents_storage[i] = *new_obj;
+			atomic_fence(ATOMIC_SEQ_CST);
+			atomic_store_p(&uevent_obj_p[i], &uevents_storage[i],
+			    ATOMIC_RELEASE);
+			return i;
+		}
+	}
+
+	return TE_REGISTER_ERRCODE_FULL_SLOTS;
+}
+
+static inline user_hook_object_t *
+uobj_get(size_t cb_idx) {
+	assert(cb_idx < TE_MAX_USER_EVENTS);
+	return (user_hook_object_t *)atomic_load_p(
+	    &uevent_obj_p[cb_idx], ATOMIC_ACQUIRE);
+}
+
+te_enabled_t
+te_user_event_enabled(size_t ue_idx, bool is_alloc) {
+	assert(ue_idx < TE_MAX_USER_EVENTS);
+	user_hook_object_t *obj = uobj_get(ue_idx);
+	if (!obj) {
+		return te_enabled_not_installed;
+	}
+	if (is_alloc || !obj->is_alloc_only) {
+		return te_enabled_yes;
+	}
+	return te_enabled_no;
+}
+
+static inline uint64_t
+new_event_wait(size_t cb_idx) {
+	user_hook_object_t *obj = uobj_get(cb_idx);
+	/* Enabled should have guarded it */
+	assert(obj);
+	return obj->interval;
+}
+
+static uint64_t
+postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+static inline void
+handler_wrapper(tsd_t *tsd, bool is_alloc, size_t cb_idx) {
+	user_hook_object_t *obj = uobj_get(cb_idx);
+	/* Enabled should have guarded it */
+	assert(obj);
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+
+	pre_reentrancy(tsd, NULL);
+	obj->callback(is_alloc, alloc, dalloc);
+	post_reentrancy(tsd);
+}
+
+#define TE_USER_HANDLER_BINDING_IDX(i)                                         \
+	static te_enabled_t te_user_alloc_enabled##i(void) {                   \
+		return te_user_event_enabled(i, true);                         \
+	}                                                                      \
+	static te_enabled_t te_user_dalloc_enabled##i(void) {                  \
+		return te_user_event_enabled(i, false);                        \
+	}                                                                      \
+	static uint64_t te_user_new_event_wait_##i(tsd_t *tsd) {               \
+		return new_event_wait(i);                                      \
+	}                                                                      \
+	static void te_user_alloc_handler_call##i(tsd_t *tsd) {                \
+		handler_wrapper(tsd, true, i);                                 \
+	}                                                                      \
+	static void te_user_dalloc_handler_call##i(tsd_t *tsd) {               \
+		handler_wrapper(tsd, false, i);                                \
+	}                                                                      \
+	static te_base_cb_t user_alloc_handler##i = {                          \
+	    .enabled = &te_user_alloc_enabled##i,                              \
+	    .new_event_wait = &te_user_new_event_wait_##i,                     \
+	    .postponed_event_wait = &postponed_event_wait,                     \
+	    .event_handler = &te_user_alloc_handler_call##i};                  \
+	static te_base_cb_t user_dalloc_handler##i = {                         \
+	    .enabled = &te_user_dalloc_enabled##i,                             \
+	    .new_event_wait = &te_user_new_event_wait_##i,                     \
+	    .postponed_event_wait = &postponed_event_wait,                     \
+	    .event_handler = &te_user_dalloc_handler_call##i}
+
+TE_USER_HANDLER_BINDING_IDX(0);
+TE_USER_HANDLER_BINDING_IDX(1);
+TE_USER_HANDLER_BINDING_IDX(2);
+TE_USER_HANDLER_BINDING_IDX(3);
+
+/* Table of all the thread events. */
+te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
+#ifdef JEMALLOC_PROF
+    &prof_sample_te_handler,
+#endif
+    &stats_interval_te_handler, &tcache_gc_te_handler,
+#ifdef JEMALLOC_STATS
+    &peak_te_handler,
+#endif
+    &user_alloc_handler0, &user_alloc_handler1, &user_alloc_handler2,
+    &user_alloc_handler3};
+
+te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {&tcache_gc_te_handler,
+#ifdef JEMALLOC_STATS
+    &peak_te_handler,
+#endif
+    &user_dalloc_handler0, &user_dalloc_handler1, &user_dalloc_handler2,
+    &user_dalloc_handler3};
+
+static inline bool
+te_update_tsd(tsd_t *tsd, uint64_t new_wait, size_t ue_idx, bool is_alloc) {
+	bool     needs_recompute = false;
+	te_ctx_t ctx;
+	uint64_t next, current, cur_wait;
+
+	if (is_alloc) {
+		tsd_te_datap_get_unsafe(tsd)
+		    ->alloc_wait[te_alloc_user0 + ue_idx] = new_wait;
+	} else {
+		tsd_te_datap_get_unsafe(tsd)
+		    ->dalloc_wait[te_dalloc_user0 + ue_idx] = new_wait;
+	}
+	te_ctx_get(tsd, &ctx, is_alloc);
+
+	next = te_ctx_next_event_get(&ctx);
+	current = te_ctx_current_bytes_get(&ctx);
+	cur_wait = next - current;
+
+	if (new_wait < cur_wait) {
+		/*
+		 * Set last event to current (same as when te inits).  This
+		 * will make sure that all the invariants are correct, before
+		 * we adjust next_event and next_event fast.
+		 */
+		te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
+		te_adjust_thresholds_helper(tsd, &ctx, new_wait);
+		needs_recompute = true;
+	}
+	return needs_recompute;
+}
+
+static inline void
+te_recalculate_current_thread_data(tsdn_t *tsdn, int ue_idx, bool alloc_only) {
+	bool recompute = false;
+	/* we do not need lock to recalculate the events on the current thread */
+	assert(ue_idx < TE_MAX_USER_EVENTS);
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	if (tsd) {
+		uint64_t new_wait = new_event_wait(ue_idx);
+		recompute = te_update_tsd(tsd, new_wait, ue_idx, true);
+		if (!alloc_only) {
+			recompute = te_update_tsd(tsd, new_wait, ue_idx, false)
+			    || recompute;
+		}
+
+		if (recompute) {
+			te_recompute_fast_threshold(tsd);
+		}
+	}
+}
+
+int
+te_register_user_handler(tsdn_t *tsdn, user_hook_object_t *te_uobj) {
+	int ret;
+	int reg_retcode;
+	if (!te_uobj || !te_uobj->callback || te_uobj->interval == 0) {
+		return EINVAL;
+	}
+
+	malloc_mutex_lock(tsdn, &uevents_mu);
+	reg_retcode = te_register_user_handler_locked(te_uobj);
+	malloc_mutex_unlock(tsdn, &uevents_mu);
+
+	if (reg_retcode >= 0) {
+		te_recalculate_current_thread_data(
+		    tsdn, reg_retcode, te_uobj->is_alloc_only);
+		ret = 0;
+	} else if (reg_retcode == TE_REGISTER_ERRCODE_ALREADY_REGISTERED) {
+		ret = 0;
+	} else {
+		ret = EINVAL;
+	}
+
+	return ret;
+}
diff --git a/src/ticker.c b/src/ticker.c
index 790b5c20..1fd6ac96 100644
--- a/src/ticker.c
+++ b/src/ticker.c
@@ -20,13 +20,8 @@
  * The values here are computed in src/ticker.py
  */
 
-const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {
-	254, 211, 187, 169, 156, 144, 135, 127,
-	120, 113, 107, 102, 97, 93, 89, 85,
-	81, 77, 74, 71, 68, 65, 62, 60,
-	57, 55, 53, 50, 48, 46, 44, 42,
-	40, 39, 37, 35, 33, 32, 30, 29,
-	27, 26, 24, 23, 21, 20, 19, 18,
-	16, 15, 14, 13, 12, 10, 9, 8,
-	7, 6, 5, 4, 3, 2, 1, 0
-};
+const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {254, 211, 187, 169,
+    156, 144, 135, 127, 120, 113, 107, 102, 97, 93, 89, 85, 81, 77, 74, 71, 68,
+    65, 62, 60, 57, 55, 53, 50, 48, 46, 44, 42, 40, 39, 37, 35, 33, 32, 30, 29,
+    27, 26, 24, 23, 21, 20, 19, 18, 16, 15, 14, 13, 12, 10, 9, 8, 7, 6, 5, 4, 3,
+    2, 1, 0};
diff --git a/src/tsd.c b/src/tsd.c
index e8e4f3a3..30acad93 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -20,11 +20,20 @@ bool tsd_booted = false;
 #elif (defined(JEMALLOC_TLS))
 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
 pthread_key_t tsd_tsd;
-bool tsd_booted = false;
+bool          tsd_booted = false;
 #elif (defined(_WIN32))
-DWORD tsd_tsd;
-tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+#	if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+DWORD         tsd_tsd;
+tsd_wrapper_t tsd_boot_wrapper = {TSD_INITIALIZER, false};
+#	else
+JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t)
+tsd_wrapper_tls = {TSD_INITIALIZER, false};
+#	endif
 bool tsd_booted = false;
+#	if JEMALLOC_WIN32_TLSGETVALUE2
+TGV2    tls_get_value2 = NULL;
+HMODULE tgv2_mod = NULL;
+#	endif
 #else
 
 /*
@@ -37,17 +46,12 @@ struct tsd_init_head_s {
 	malloc_mutex_t lock;
 };
 
-pthread_key_t tsd_tsd;
-tsd_init_head_t	tsd_init_head = {
-	ql_head_initializer(blocks),
-	MALLOC_MUTEX_INITIALIZER
-};
+pthread_key_t   tsd_tsd;
+tsd_init_head_t tsd_init_head = {
+    ql_head_initializer(blocks), MALLOC_MUTEX_INITIALIZER};
 
-tsd_wrapper_t tsd_boot_wrapper = {
-	false,
-	TSD_INITIALIZER
-};
-bool tsd_booted = false;
+tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+bool          tsd_booted = false;
 #endif
 
 JEMALLOC_DIAGNOSTIC_POP
@@ -56,7 +60,7 @@ JEMALLOC_DIAGNOSTIC_POP
 
 /* A list of all the tsds in the nominal state. */
 typedef ql_head(tsd_t) tsd_list_t;
-static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
+static tsd_list_t     tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
 static malloc_mutex_t tsd_nominal_tsds_lock;
 
 /* How many slow-path-enabling features are turned on. */
@@ -65,13 +69,13 @@ static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
 static bool
 tsd_in_nominal_list(tsd_t *tsd) {
 	tsd_t *tsd_list;
-	bool found = false;
+	bool   found = false;
 	/*
 	 * We don't know that tsd is nominal; it might not be safe to get data
 	 * out of it here.
 	 */
 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
-	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
+	ql_foreach (tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		if (tsd == tsd_list) {
 			found = true;
 			break;
@@ -109,7 +113,7 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	atomic_fence(ATOMIC_RELEASE);
 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
 	tsd_t *remote_tsd;
-	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
+	ql_foreach (remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
 		tsd_atomic_store(&remote_tsd->state,
@@ -135,7 +139,8 @@ tsd_global_slow_inc(tsdn_t *tsdn) {
 	tsd_force_recompute(tsdn);
 }
 
-void tsd_global_slow_dec(tsdn_t *tsdn) {
+void
+tsd_global_slow_dec(tsdn_t *tsdn) {
 	atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
 	/* See the note in ..._inc(). */
 	tsd_force_recompute(tsdn);
@@ -148,7 +153,7 @@ tsd_local_slow(tsd_t *tsd) {
 }
 
 bool
-tsd_global_slow() {
+tsd_global_slow(void) {
 	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
 }
 
@@ -172,8 +177,8 @@ tsd_slow_update(tsd_t *tsd) {
 	uint8_t old_state;
 	do {
 		uint8_t new_state = tsd_state_compute(tsd);
-		old_state = tsd_atomic_exchange(&tsd->state, new_state,
-		    ATOMIC_ACQUIRE);
+		old_state = tsd_atomic_exchange(
+		    &tsd->state, new_state, ATOMIC_ACQUIRE);
 	} while (old_state == tsd_state_nominal_recompute);
 
 	te_recompute_fast_threshold(tsd);
@@ -203,8 +208,8 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 		assert(tsd_in_nominal_list(tsd));
 		if (new_state > tsd_state_nominal_max) {
 			tsd_remove_nominal(tsd);
-			tsd_atomic_store(&tsd->state, new_state,
-			    ATOMIC_RELAXED);
+			tsd_atomic_store(
+			    &tsd->state, new_state, ATOMIC_RELAXED);
 		} else {
 			/*
 			 * This is the tricky case.  We're transitioning from
@@ -227,8 +232,7 @@ tsd_prng_state_init(tsd_t *tsd) {
 	 * cost of test repeatability.  For debug builds, instead use a
 	 * deterministic seed.
 	 */
-	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
-	    (uint64_t)(uintptr_t)tsd;
+	*tsd_prng_statep_get(tsd) = config_debug ? 0 : (uint64_t)(uintptr_t)tsd;
 }
 
 static bool
@@ -256,8 +260,8 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) {
 
 static bool
 tsd_data_init_nocleanup(tsd_t *tsd) {
-	assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
-	    tsd_state_get(tsd) == tsd_state_minimal_initialized);
+	assert(tsd_state_get(tsd) == tsd_state_reincarnated
+	    || tsd_state_get(tsd) == tsd_state_minimal_initialized);
 	/*
 	 * During reincarnation, there is no guarantee that the cleanup function
 	 * will be called (deallocation may happen after all tsd destructors).
@@ -300,9 +304,25 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) {
 			tsd_state_set(tsd, tsd_state_minimal_initialized);
 			tsd_set(tsd);
 			tsd_data_init_nocleanup(tsd);
+			*tsd_min_init_state_nfetchedp_get(tsd) = 1;
 		}
 	} else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
-		if (!minimal) {
+		/*
+		 * If a thread only ever deallocates (e.g. dedicated reclamation
+		 * threads), we want to help it to eventually escape the slow
+		 * path (caused by the minimal initialized state).  The nfetched
+		 * counter tracks the number of times the tsd has been accessed
+		 * under the min init state, and triggers the switch to nominal
+		 * once reached the max allowed count.
+		 *
+		 * This means at most 128 deallocations stay on the slow path.
+		 *
+		 * Also see comments in free_default().
+		 */
+		uint8_t *nfetched = tsd_min_init_state_nfetchedp_get(tsd);
+		assert(*nfetched >= 1);
+		(*nfetched)++;
+		if (!minimal || *nfetched == TSD_MIN_INIT_STATE_MAX_FETCHED) {
 			/* Switch to fully initialized. */
 			tsd_state_set(tsd, tsd_state_nominal);
 			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
@@ -334,15 +354,15 @@ malloc_tsd_dalloc(void *wrapper) {
 }
 
 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
-static unsigned ncleanups;
+static unsigned             ncleanups;
 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
 
-#ifndef _WIN32
+#	ifndef _WIN32
 JEMALLOC_EXPORT
-#endif
+#	endif
 void
 _malloc_thread_cleanup(void) {
-	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
+	bool     pending[MALLOC_TSD_CLEANUPS_MAX], again;
 	unsigned i;
 
 	for (i = 0; i < ncleanups; i++) {
@@ -362,9 +382,9 @@ _malloc_thread_cleanup(void) {
 	} while (again);
 }
 
-#ifndef _WIN32
+#	ifndef _WIN32
 JEMALLOC_EXPORT
-#endif
+#	endif
 void
 _malloc_tsd_cleanup_register(bool (*f)(void)) {
 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
@@ -422,7 +442,7 @@ tsd_cleanup(void *arg) {
 	}
 #ifdef JEMALLOC_JET
 	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
-	int *data = tsd_test_datap_get_unsafe(tsd);
+	int            *data = tsd_test_datap_get_unsafe(tsd);
 	if (test_callback != NULL) {
 		test_callback(data);
 	}
@@ -437,7 +457,7 @@ malloc_tsd_boot0(void) {
 	ncleanups = 0;
 #endif
 	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
-	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
 		return NULL;
 	}
 	if (tsd_boot0()) {
@@ -459,11 +479,11 @@ malloc_tsd_boot1(void) {
 static BOOL WINAPI
 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
 	switch (fdwReason) {
-#ifdef JEMALLOC_LAZY_LOCK
+#	ifdef JEMALLOC_LAZY_LOCK
 	case DLL_THREAD_ATTACH:
 		isthreaded = true;
 		break;
-#endif
+#	endif
 	case DLL_THREAD_DETACH:
 		_malloc_thread_cleanup();
 		break;
@@ -478,36 +498,37 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
  * hooked "read". We won't read for the rest of the file, so we can get away
  * with unhooking.
  */
-#ifdef read
-#  undef read
+#	ifdef read
+#		undef read
+#	endif
+
+#	ifdef _MSC_VER
+#		ifdef _M_IX86
+#			pragma comment(linker, "/INCLUDE:__tls_used")
+#			pragma comment(linker, "/INCLUDE:_tls_callback")
+#		else
+#			pragma comment(linker, "/INCLUDE:_tls_used")
+#			pragma comment(                                       \
+			    linker, "/INCLUDE:" STRINGIFY(tls_callback))
+#		endif
+#		pragma section(".CRT$XLY", long, read)
+#	endif
+JEMALLOC_SECTION(".CRT$XLY")
+JEMALLOC_ATTR(used) BOOL(WINAPI *const tls_callback)(
+    HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
 #endif
 
-#ifdef _MSC_VER
-#  ifdef _M_IX86
-#    pragma comment(linker, "/INCLUDE:__tls_used")
-#    pragma comment(linker, "/INCLUDE:_tls_callback")
-#  else
-#    pragma comment(linker, "/INCLUDE:_tls_used")
-#    pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
-#  endif
-#  pragma section(".CRT$XLY",long,read)
-#endif
-JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
-BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
-    DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
-#endif
-
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
-    !defined(_WIN32))
+#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS)        \
+    && !defined(_WIN32))
 void *
 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
-	pthread_t self = pthread_self();
+	pthread_t         self = pthread_self();
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
 	malloc_mutex_lock(TSDN_NULL, &head->lock);
-	ql_foreach(iter, &head->blocks, link) {
-		if (iter->thread == self) {
+	ql_foreach (iter, &head->blocks, link) {
+		if (pthread_equal(iter->thread, self)) {
 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return iter->data;
 		}
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 00000000..1bcf4fee
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,48 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/util.h"
+
+/* Reads the next size pair in a multi-sized option. */
+bool
+multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
+    size_t *key_start, size_t *key_end, size_t *value) {
+	const char *cur = *setting_segment_cur;
+	char       *end;
+	uintmax_t   um;
+
+	set_errno(0);
+
+	/* First number, then '-' */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0 || *end != '-') {
+		return true;
+	}
+	*key_start = (size_t)um;
+	cur = end + 1;
+
+	/* Second number, then ':' */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0 || *end != ':') {
+		return true;
+	}
+	*key_end = (size_t)um;
+	cur = end + 1;
+
+	/* Last number */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0) {
+		return true;
+	}
+	*value = (size_t)um;
+
+	/* Consume the separator if there is one. */
+	if (*end == '|') {
+		end++;
+	}
+
+	*len_left -= end - *setting_segment_cur;
+	*setting_segment_cur = end;
+
+	return false;
+}
diff --git a/src/witness.c b/src/witness.c
index 4474af04..940b1eae 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -26,8 +26,8 @@ witness_print_witness(witness_t *w, unsigned n) {
 static void
 witness_print_witnesses(const witness_list_t *witnesses) {
 	witness_t *w, *last = NULL;
-	unsigned n = 0;
-	ql_foreach(w, witnesses, link) {
+	unsigned   n = 0;
+	ql_foreach (w, witnesses, link) {
 		if (last != NULL && w->rank > last->rank) {
 			assert(w->name != last->name);
 			witness_print_witness(last, n);
@@ -45,8 +45,8 @@ witness_print_witnesses(const witness_list_t *witnesses) {
 }
 
 static void
-witness_lock_error_impl(const witness_list_t *witnesses,
-    const witness_t *witness) {
+witness_lock_error_impl(
+    const witness_list_t *witnesses, const witness_t *witness) {
 	malloc_printf("<jemalloc>: Lock rank order reversal:");
 	witness_print_witnesses(witnesses);
 	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
@@ -56,8 +56,8 @@ witness_lock_error_t *JET_MUTABLE witness_lock_error = witness_lock_error_impl;
 
 static void
 witness_owner_error_impl(const witness_t *witness) {
-	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
-	    witness->rank);
+	malloc_printf(
+	    "<jemalloc>: Should own %s(%u)\n", witness->name, witness->rank);
 	abort();
 }
 witness_owner_error_t *JET_MUTABLE witness_owner_error =
@@ -76,7 +76,7 @@ static void
 witness_depth_error_impl(const witness_list_t *witnesses,
     witness_rank_t rank_inclusive, unsigned depth) {
 	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
-	    (depth != 1) ?  "s" : "", rank_inclusive);
+	    (depth != 1) ? "s" : "", rank_inclusive);
 	witness_print_witnesses(witnesses);
 	malloc_printf("\n");
 	abort();
diff --git a/src/zone.c b/src/zone.c
index 23dfdd04..e09de4b8 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/assert.h"
 
 #ifndef JEMALLOC_ZONE
-#  error "This source file is for zones on Darwin (OS X)."
+#	error "This source file is for zones on Darwin (OS X)."
 #endif
 
 /* Definitions of the following structs in malloc/malloc.h might be too old
@@ -22,10 +22,11 @@ typedef struct _malloc_zone_t {
 	void *(*realloc)(struct _malloc_zone_t *, void *, size_t);
 	void (*destroy)(struct _malloc_zone_t *);
 	const char *zone_name;
-	unsigned (*batch_malloc)(struct _malloc_zone_t *, size_t, void **, unsigned);
+	unsigned (*batch_malloc)(
+	    struct _malloc_zone_t *, size_t, void **, unsigned);
 	void (*batch_free)(struct _malloc_zone_t *, void **, unsigned);
 	struct malloc_introspection_t *introspect;
-	unsigned version;
+	unsigned                       version;
 	void *(*memalign)(struct _malloc_zone_t *, size_t, size_t);
 	void (*free_definite_size)(struct _malloc_zone_t *, void *, size_t);
 	size_t (*pressure_relief)(struct _malloc_zone_t *, size_t);
@@ -33,22 +34,24 @@ typedef struct _malloc_zone_t {
 
 typedef struct {
 	vm_address_t address;
-	vm_size_t size;
+	vm_size_t    size;
 } vm_range_t;
 
 typedef struct malloc_statistics_t {
 	unsigned blocks_in_use;
-	size_t size_in_use;
-	size_t max_size_in_use;
-	size_t size_allocated;
+	size_t   size_in_use;
+	size_t   max_size_in_use;
+	size_t   size_allocated;
 } malloc_statistics_t;
 
 typedef kern_return_t memory_reader_t(task_t, vm_address_t, vm_size_t, void **);
 
-typedef void vm_range_recorder_t(task_t, void *, unsigned type, vm_range_t *, unsigned);
+typedef void vm_range_recorder_t(
+    task_t, void *, unsigned type, vm_range_t *, unsigned);
 
 typedef struct malloc_introspection_t {
-	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t, memory_reader_t, vm_range_recorder_t);
+	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t,
+	    memory_reader_t, vm_range_recorder_t);
 	size_t (*good_size)(malloc_zone_t *, size_t);
 	boolean_t (*check)(malloc_zone_t *);
 	void (*print)(malloc_zone_t *, boolean_t);
@@ -61,14 +64,16 @@ typedef struct malloc_introspection_t {
 	boolean_t (*disable_discharge_checking)(malloc_zone_t *);
 	void (*discharge)(malloc_zone_t *, void *);
 #ifdef __BLOCKS__
-	void (*enumerate_discharged_pointers)(malloc_zone_t *, void (^)(void *, void *));
+	void (*enumerate_discharged_pointers)(
+	    malloc_zone_t *, void (^)(void *, void *));
 #else
 	void *enumerate_unavailable_without_blocks;
 #endif
 	void (*reinit_lock)(malloc_zone_t *);
 } malloc_introspection_t;
 
-extern kern_return_t malloc_get_all_zones(task_t, memory_reader_t, vm_address_t **, unsigned *);
+extern kern_return_t malloc_get_all_zones(
+    task_t, memory_reader_t, vm_address_t **, unsigned *);
 
 extern malloc_zone_t *malloc_default_zone(void);
 
@@ -81,48 +86,46 @@ extern void malloc_zone_unregister(malloc_zone_t *zone);
  * We need to check whether it is present at runtime, thus the weak_import.
  */
 extern malloc_zone_t *malloc_default_purgeable_zone(void)
-JEMALLOC_ATTR(weak_import);
+    JEMALLOC_ATTR(weak_import);
 
 /******************************************************************************/
 /* Data. */
 
-static malloc_zone_t *default_zone, *purgeable_zone;
-static malloc_zone_t jemalloc_zone;
+static malloc_zone_t                *default_zone, *purgeable_zone;
+static malloc_zone_t                 jemalloc_zone;
 static struct malloc_introspection_t jemalloc_zone_introspect;
-static pid_t zone_force_lock_pid = -1;
+static pid_t                         zone_force_lock_pid = -1;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static size_t	zone_size(malloc_zone_t *zone, const void *ptr);
-static void	*zone_malloc(malloc_zone_t *zone, size_t size);
-static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
-static void	*zone_valloc(malloc_zone_t *zone, size_t size);
-static void	zone_free(malloc_zone_t *zone, void *ptr);
-static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
-    size_t size);
-static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
-    size_t size);
-static void	zone_destroy(malloc_zone_t *zone);
-static unsigned	zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
+static size_t zone_size(malloc_zone_t *zone, const void *ptr);
+static void  *zone_malloc(malloc_zone_t *zone, size_t size);
+static void  *zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void  *zone_valloc(malloc_zone_t *zone, size_t size);
+static void   zone_free(malloc_zone_t *zone, void *ptr);
+static void  *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+static void  *zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size);
+static void   zone_free_definite_size(
+      malloc_zone_t *zone, void *ptr, size_t size);
+static void     zone_destroy(malloc_zone_t *zone);
+static unsigned zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
     void **results, unsigned num_requested);
-static void	zone_batch_free(struct _malloc_zone_t *zone,
-    void **to_be_freed, unsigned num_to_be_freed);
-static size_t	zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
-static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
-static kern_return_t	zone_enumerator(task_t task, void *data, unsigned type_mask,
-    vm_address_t zone_address, memory_reader_t reader,
+static void     zone_batch_free(
+        struct _malloc_zone_t *zone, void **to_be_freed, unsigned num_to_be_freed);
+static size_t zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
+static size_t zone_good_size(malloc_zone_t *zone, size_t size);
+static kern_return_t zone_enumerator(task_t task, void *data,
+    unsigned type_mask, vm_address_t zone_address, memory_reader_t reader,
     vm_range_recorder_t recorder);
-static boolean_t	zone_check(malloc_zone_t *zone);
-static void	zone_print(malloc_zone_t *zone, boolean_t verbose);
-static void	zone_log(malloc_zone_t *zone, void *address);
-static void	zone_force_lock(malloc_zone_t *zone);
-static void	zone_force_unlock(malloc_zone_t *zone);
-static void	zone_statistics(malloc_zone_t *zone,
-    malloc_statistics_t *stats);
-static boolean_t	zone_locked(malloc_zone_t *zone);
-static void	zone_reinit_lock(malloc_zone_t *zone);
+static boolean_t     zone_check(malloc_zone_t *zone);
+static void          zone_print(malloc_zone_t *zone, boolean_t verbose);
+static void          zone_log(malloc_zone_t *zone, void *address);
+static void          zone_force_lock(malloc_zone_t *zone);
+static void          zone_force_unlock(malloc_zone_t *zone);
+static void zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats);
+static boolean_t zone_locked(malloc_zone_t *zone);
+static void      zone_reinit_lock(malloc_zone_t *zone);
 
 /******************************************************************************/
 /*
@@ -225,8 +228,8 @@ zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
 }
 
 static void
-zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
-    unsigned num_to_be_freed) {
+zone_batch_free(
+    struct _malloc_zone_t *zone, void **to_be_freed, unsigned num_to_be_freed) {
 	unsigned i;
 
 	for (i = 0; i < num_to_be_freed; i++) {
@@ -261,12 +264,10 @@ zone_check(malloc_zone_t *zone) {
 }
 
 static void
-zone_print(malloc_zone_t *zone, boolean_t verbose) {
-}
+zone_print(malloc_zone_t *zone, boolean_t verbose) {}
 
 static void
-zone_log(malloc_zone_t *zone, void *address) {
-}
+zone_log(malloc_zone_t *zone, void *address) {}
 
 static void
 zone_force_lock(malloc_zone_t *zone) {
@@ -369,7 +370,7 @@ zone_init(void) {
 static malloc_zone_t *
 zone_default_get(void) {
 	malloc_zone_t **zones = NULL;
-	unsigned int num_zones = 0;
+	unsigned int    num_zones = 0;
 
 	/*
 	 * On OSX 10.12, malloc_default_zone returns a special zone that is not
@@ -380,8 +381,9 @@ zone_default_get(void) {
 	 * zone is the default.  So get the list of zones to get the first one,
 	 * instead of relying on malloc_default_zone.
 	 */
-	if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
-	    (vm_address_t**)&zones, &num_zones)) {
+	if (KERN_SUCCESS
+	    != malloc_get_all_zones(
+	        0, NULL, (vm_address_t **)&zones, &num_zones)) {
 		/*
 		 * Reset the value in case the failure happened after it was
 		 * set.
@@ -441,8 +443,8 @@ zone_register(void) {
 	 * register jemalloc's.
 	 */
 	default_zone = zone_default_get();
-	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
-	    "DefaultMallocZone") != 0) {
+	if (!default_zone->zone_name
+	    || strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
 		return;
 	}
 
@@ -457,8 +459,9 @@ zone_register(void) {
 	 * to check for the existence of malloc_default_purgeable_zone() at
 	 * run time.
 	 */
-	purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
-	    malloc_default_purgeable_zone();
+	purgeable_zone = (malloc_default_purgeable_zone == NULL)
+	    ? NULL
+	    : malloc_default_purgeable_zone();
 
 	/* Register the custom zone.  At this point it won't be the default. */
 	zone_init();
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
index a96ca942..e4bf7942 100644
--- a/test/analyze/prof_bias.c
+++ b/test/analyze/prof_bias.c
@@ -46,15 +46,15 @@ do_allocs(size_t sz, size_t cnt, bool do_frees) {
 int
 main(void) {
 	size_t lg_prof_sample_local = 19;
-	int err = mallctl("prof.reset", NULL, NULL,
-	    (void *)&lg_prof_sample_local, sizeof(lg_prof_sample_local));
+	int    err = mallctl("prof.reset", NULL, NULL,
+	       (void *)&lg_prof_sample_local, sizeof(lg_prof_sample_local));
 	assert(err == 0);
 
 	prof_backtrace_hook_set(mock_backtrace);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ true);
-	do_allocs(32 * 1024* 1024, 16, /* do_frees */ true);
+	do_allocs(32 * 1024 * 1024, 16, /* do_frees */ true);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ false);
-	do_allocs(32 * 1024* 1024, 16, /* do_frees */ false);
+	do_allocs(32 * 1024 * 1024, 16, /* do_frees */ false);
 
 	return 0;
 }
diff --git a/test/analyze/rand.c b/test/analyze/rand.c
index bb20b06e..4c7e18c7 100644
--- a/test/analyze/rand.c
+++ b/test/analyze/rand.c
@@ -72,13 +72,13 @@ print_buckets(const size_t buckets[], const size_t means[],
 		if (buckets[i] + stddevs[i] <= means[i]) {
 			malloc_write(" ");
 			for (size_t t = means[i] - buckets[i]; t >= stddevs[i];
-			    t -= stddevs[i]) {
+			     t -= stddevs[i]) {
 				malloc_write("-");
 			}
 		} else if (buckets[i] >= means[i] + stddevs[i]) {
 			malloc_write(" ");
 			for (size_t t = buckets[i] - means[i]; t >= stddevs[i];
-			    t -= stddevs[i]) {
+			     t -= stddevs[i]) {
 				malloc_write("+");
 			}
 		}
@@ -93,8 +93,8 @@ bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
 	for (size_t i = 1; i <= 3; ++i) {
 		malloc_printf("round %zu\n", i);
 		fill(buckets, n_bucket, 0);
-		collect_buckets(gen, opaque, buckets, n_bucket,
-		    lg_bucket_width, n_iter);
+		collect_buckets(
+		    gen, opaque, buckets, n_bucket, lg_bucket_width, n_iter);
 		print_buckets(buckets, means, stddevs, n_bucket);
 	}
 }
@@ -108,7 +108,7 @@ bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
 
 typedef struct uniform_gen_arg_s uniform_gen_arg_t;
 struct uniform_gen_arg_s {
-	uint64_t state;
+	uint64_t       state;
 	const unsigned lg_range;
 };
 
@@ -131,8 +131,10 @@ TEST_BEGIN(test_uniform) {
 	 * integers, and that the minimal bucket mean is at least
 	 * MIN_BUCKET_MEAN.
 	 */
-	const size_t q = 1 << QUOTIENT_CEIL(LG_CEIL(QUOTIENT_CEIL(
-	    MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))), 2);
+	const size_t q = 1 << QUOTIENT_CEIL(
+	                     LG_CEIL(QUOTIENT_CEIL(
+	                         MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))),
+	                     2);
 	const size_t stddev = (N_BUCKET - 1) * q;
 	const size_t mean = N_BUCKET * stddev * q;
 	const size_t n_iter = N_BUCKET * mean;
@@ -142,14 +144,14 @@ TEST_BEGIN(test_uniform) {
 	size_t stddevs[N_BUCKET];
 	fill(stddevs, N_BUCKET, stddev);
 
-	uniform_gen_arg_t arg = {(uint64_t)(uintptr_t)&lg_range_test,
-	    lg_range_test};
+	uniform_gen_arg_t arg = {
+	    (uint64_t)(uintptr_t)&lg_range_test, lg_range_test};
 	size_t buckets[N_BUCKET];
 	assert_zu_ge(lg_range_test, LG_N_BUCKET, "");
 	const size_t lg_bucket_width = lg_range_test - LG_N_BUCKET;
 
-	bucket_analysis(uniform_gen, &arg, buckets, means, stddevs,
-	    N_BUCKET, lg_bucket_width, n_iter);
+	bucket_analysis(uniform_gen, &arg, buckets, means, stddevs, N_BUCKET,
+	    lg_bucket_width, n_iter);
 
 #undef LG_N_BUCKET
 #undef N_BUCKET
@@ -168,8 +170,8 @@ TEST_END
  * comments in test_prof_sample for explanations for n_divide.
  */
 static double
-fill_geometric_proportions(double proportions[], const size_t n_bucket,
-    const size_t n_divide) {
+fill_geometric_proportions(
+    double proportions[], const size_t n_bucket, const size_t n_divide) {
 	assert(n_bucket > 0);
 	assert(n_divide > 0);
 	double x = 1.;
@@ -220,12 +222,12 @@ TEST_BEGIN(test_prof_sample) {
 #ifdef JEMALLOC_PROF
 
 /* Number of divisions within [0, mean). */
-#define LG_N_DIVIDE 3
-#define N_DIVIDE (1 << LG_N_DIVIDE)
+#	define LG_N_DIVIDE 3
+#	define N_DIVIDE (1 << LG_N_DIVIDE)
 
 /* Coverage of buckets in terms of multiples of mean. */
-#define LG_N_MULTIPLY 2
-#define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
+#	define LG_N_MULTIPLY 2
+#	define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
 
 	test_skip_if(!opt_prof);
 
@@ -233,14 +235,15 @@ TEST_BEGIN(test_prof_sample) {
 
 	size_t lg_prof_sample_orig = lg_prof_sample;
 	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_test,
-	    sizeof(size_t)), 0, "");
+	                sizeof(size_t)),
+	    0, "");
 	malloc_printf("lg_prof_sample = %zu\n", lg_prof_sample_test);
 
-	double proportions[N_GEO_BUCKET + 1];
-	const double min_proportion = fill_geometric_proportions(proportions,
-	    N_GEO_BUCKET + 1, N_DIVIDE);
-	const size_t n_iter = round_to_nearest(MIN_BUCKET_MEAN /
-	    min_proportion);
+	double       proportions[N_GEO_BUCKET + 1];
+	const double min_proportion = fill_geometric_proportions(
+	    proportions, N_GEO_BUCKET + 1, N_DIVIDE);
+	const size_t n_iter = round_to_nearest(
+	    MIN_BUCKET_MEAN / min_proportion);
 	size_t means[N_GEO_BUCKET + 1];
 	size_t stddevs[N_GEO_BUCKET + 1];
 	fill_references(means, stddevs, proportions, N_GEO_BUCKET + 1, n_iter);
@@ -255,12 +258,13 @@ TEST_BEGIN(test_prof_sample) {
 	    N_GEO_BUCKET + 1, lg_bucket_width, n_iter);
 
 	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_orig,
-	    sizeof(size_t)), 0, "");
+	                sizeof(size_t)),
+	    0, "");
 
-#undef LG_N_DIVIDE
-#undef N_DIVIDE
-#undef LG_N_MULTIPLY
-#undef N_GEO_BUCKET
+#	undef LG_N_DIVIDE
+#	undef N_DIVIDE
+#	undef LG_N_MULTIPLY
+#	undef N_GEO_BUCKET
 
 #endif /* JEMALLOC_PROF */
 }
@@ -270,7 +274,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_uniform,
-	    test_prof_sample);
+	return test_no_reentrancy(test_uniform, test_prof_sample);
 }
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index 44c9de5e..b8d10629 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -11,9 +11,9 @@
 
 static void
 do_print(const char *name, size_t sz_bytes) {
-	const char *sizes[] = {"bytes", "KB", "MB", "GB", "TB", "PB", "EB",
-		"ZB"};
-	size_t sizes_max = sizeof(sizes)/sizeof(sizes[0]);
+	const char *sizes[] = {
+	    "bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"};
+	size_t sizes_max = sizeof(sizes) / sizeof(sizes[0]);
 
 	size_t ind = 0;
 	double sz = sz_bytes;
@@ -29,9 +29,8 @@ do_print(const char *name, size_t sz_bytes) {
 }
 
 int
-main() {
-#define P(type)								\
-	do_print(#type, sizeof(type))
+main(void) {
+#define P(type) do_print(#type, sizeof(type))
 	P(arena_t);
 	P(arena_stats_t);
 	P(base_t);
diff --git a/test/include/test/SFMT-alti.h b/test/include/test/SFMT-alti.h
index a1885dbf..d6a85ad1 100644
--- a/test/include/test/SFMT-alti.h
+++ b/test/include/test/SFMT-alti.h
@@ -61,58 +61,59 @@
  * @return output
  */
 JEMALLOC_ALWAYS_INLINE
-vector unsigned int vec_recursion(vector unsigned int a,
-						vector unsigned int b,
-						vector unsigned int c,
-						vector unsigned int d) {
-
-    const vector unsigned int sl1 = ALTI_SL1;
-    const vector unsigned int sr1 = ALTI_SR1;
+vector unsigned int
+vec_recursion(vector unsigned int a, vector unsigned int b,
+    vector unsigned int c, vector unsigned int d) {
+	const vector unsigned int sl1 = ALTI_SL1;
+	const vector unsigned int sr1 = ALTI_SR1;
 #ifdef ONLY64
-    const vector unsigned int mask = ALTI_MSK64;
-    const vector unsigned char perm_sl = ALTI_SL2_PERM64;
-    const vector unsigned char perm_sr = ALTI_SR2_PERM64;
+	const vector unsigned int  mask = ALTI_MSK64;
+	const vector unsigned char perm_sl = ALTI_SL2_PERM64;
+	const vector unsigned char perm_sr = ALTI_SR2_PERM64;
 #else
-    const vector unsigned int mask = ALTI_MSK;
-    const vector unsigned char perm_sl = ALTI_SL2_PERM;
-    const vector unsigned char perm_sr = ALTI_SR2_PERM;
+	const vector unsigned int  mask = ALTI_MSK;
+	const vector unsigned char perm_sl = ALTI_SL2_PERM;
+	const vector unsigned char perm_sr = ALTI_SR2_PERM;
 #endif
-    vector unsigned int v, w, x, y, z;
-    x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
-    v = a;
-    y = vec_sr(b, sr1);
-    z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
-    w = vec_sl(d, sl1);
-    z = vec_xor(z, w);
-    y = vec_and(y, mask);
-    v = vec_xor(v, x);
-    z = vec_xor(z, y);
-    z = vec_xor(z, v);
-    return z;
+	vector unsigned int v, w, x, y, z;
+	x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
+	v = a;
+	y = vec_sr(b, sr1);
+	z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
+	w = vec_sl(d, sl1);
+	z = vec_xor(z, w);
+	y = vec_and(y, mask);
+	v = vec_xor(v, x);
+	z = vec_xor(z, y);
+	z = vec_xor(z, v);
+	return z;
 }
 
 /**
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    vector unsigned int r, r1, r2;
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int                 i;
+	vector unsigned int r, r1, r2;
 
-    r1 = ctx->sfmt[N - 2].s;
-    r2 = ctx->sfmt[N - 1].s;
-    for (i = 0; i < N - POS1; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
-	ctx->sfmt[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1 - N].s, r1, r2);
-	ctx->sfmt[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = ctx->sfmt[N - 2].s;
+	r2 = ctx->sfmt[N - 1].s;
+	for (i = 0; i < N - POS1; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
+		ctx->sfmt[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1 - N].s, r1, r2);
+		ctx->sfmt[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 /**
@@ -122,50 +123,57 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pesudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    vector unsigned int r, r1, r2;
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int                 i, j;
+	vector unsigned int r, r1, r2;
 
-    r1 = ctx->sfmt[N - 2].s;
-    r2 = ctx->sfmt[N - 1].s;
-    for (i = 0; i < N - POS1; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    /* main loop */
-    for (; i < size - N; i++) {
-	r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	ctx->sfmt[j].s = array[j + size - N].s;
-    }
-    for (; i < size; i++) {
-	r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	ctx->sfmt[j++].s = r;
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = ctx->sfmt[N - 2].s;
+	r2 = ctx->sfmt[N - 1].s;
+	for (i = 0; i < N - POS1; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	/* main loop */
+	for (; i < size - N; i++) {
+		r = vec_recursion(
+		    array[i - N].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		ctx->sfmt[j].s = array[j + size - N].s;
+	}
+	for (; i < size; i++) {
+		r = vec_recursion(
+		    array[i - N].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		ctx->sfmt[j++].s = r;
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 #ifndef ONLY64
-#if defined(__APPLE__)
-#define ALTI_SWAP (vector unsigned char) \
-	(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
-#else
-#define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
-#endif
+#	if defined(__APPLE__)
+#		define ALTI_SWAP                                              \
+			(vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, \
+			    14, 15, 8, 9, 10, 11)
+#	else
+#		define ALTI_SWAP                                              \
+			{ 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }
+#	endif
 /**
  * This function swaps high and low 32-bit of 64-bit integers in user
  * specified array.
@@ -173,13 +181,15 @@ static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
  * @param array an 128-bit array to be swaped.
  * @param size size of 128-bit array.
  */
-static inline void swap(w128_t *array, int size) {
-    int i;
-    const vector unsigned char perm = ALTI_SWAP;
+static inline void
+swap(w128_t *array, int size) {
+	int                        i;
+	const vector unsigned char perm = ALTI_SWAP;
 
-    for (i = 0; i < size; i++) {
-	array[i].s = vec_perm(array[i].s, (vector unsigned int)perm, perm);
-    }
+	for (i = 0; i < size; i++) {
+		array[i].s = vec_perm(
+		    array[i].s, (vector unsigned int)perm, perm);
+	}
 }
 #endif
 
diff --git a/test/include/test/SFMT-params.h b/test/include/test/SFMT-params.h
index ade66222..4ff4316f 100644
--- a/test/include/test/SFMT-params.h
+++ b/test/include/test/SFMT-params.h
@@ -37,15 +37,15 @@
 #define SFMT_PARAMS_H
 
 #if !defined(MEXP)
-#ifdef __GNUC__
-  #warning "MEXP is not defined. I assume MEXP is 19937."
-#endif
-  #define MEXP 19937
+#	ifdef __GNUC__
+#		warning "MEXP is not defined. I assume MEXP is 19937."
+#	endif
+#	define MEXP 19937
 #endif
 /*-----------------
   BASIC DEFINITIONS
   -----------------*/
-/** Mersenne Exponent. The period of the sequence 
+/** Mersenne Exponent. The period of the sequence
  *  is a multiple of 2^MEXP-1.
  * #define MEXP 19937 */
 /** SFMT generator has an internal state array of 128-bit integers,
@@ -63,25 +63,25 @@
   following definitions are in paramsXXXX.h file.
   ----------------------*/
 /** the pick up position of the array.
-#define POS1 122 
+#define POS1 122
 */
 
 /** the parameter of shift left as four 32-bit registers.
 #define SL1 18
  */
 
-/** the parameter of shift left as one 128-bit register. 
- * The 128-bit integer is shifted by (SL2 * 8) bits. 
-#define SL2 1 
+/** the parameter of shift left as one 128-bit register.
+ * The 128-bit integer is shifted by (SL2 * 8) bits.
+#define SL2 1
 */
 
 /** the parameter of shift right as four 32-bit registers.
 #define SR1 11
 */
 
-/** the parameter of shift right as one 128-bit register. 
- * The 128-bit integer is shifted by (SL2 * 8) bits. 
-#define SR2 1 
+/** the parameter of shift right as one 128-bit register.
+ * The 128-bit integer is shifted by (SL2 * 8) bits.
+#define SR2 1
 */
 
 /** A bitmask, used in the recursion.  These parameters are introduced
@@ -89,7 +89,7 @@
 #define MSK1 0xdfffffefU
 #define MSK2 0xddfecb7fU
 #define MSK3 0xbffaffffU
-#define MSK4 0xbffffff6U 
+#define MSK4 0xbffffff6U
 */
 
 /** These definitions are part of a 128-bit period certification vector.
@@ -100,32 +100,32 @@
 */
 
 #if MEXP == 607
-  #include "test/SFMT-params607.h"
+#	include "test/SFMT-params607.h"
 #elif MEXP == 1279
-  #include "test/SFMT-params1279.h"
+#	include "test/SFMT-params1279.h"
 #elif MEXP == 2281
-  #include "test/SFMT-params2281.h"
+#	include "test/SFMT-params2281.h"
 #elif MEXP == 4253
-  #include "test/SFMT-params4253.h"
+#	include "test/SFMT-params4253.h"
 #elif MEXP == 11213
-  #include "test/SFMT-params11213.h"
+#	include "test/SFMT-params11213.h"
 #elif MEXP == 19937
-  #include "test/SFMT-params19937.h"
+#	include "test/SFMT-params19937.h"
 #elif MEXP == 44497
-  #include "test/SFMT-params44497.h"
+#	include "test/SFMT-params44497.h"
 #elif MEXP == 86243
-  #include "test/SFMT-params86243.h"
+#	include "test/SFMT-params86243.h"
 #elif MEXP == 132049
-  #include "test/SFMT-params132049.h"
+#	include "test/SFMT-params132049.h"
 #elif MEXP == 216091
-  #include "test/SFMT-params216091.h"
+#	include "test/SFMT-params216091.h"
 #else
-#ifdef __GNUC__
-  #error "MEXP is not valid."
-  #undef MEXP
-#else
-  #undef MEXP
-#endif
+#	ifdef __GNUC__
+#		error "MEXP is not valid."
+#		undef MEXP
+#	else
+#		undef MEXP
+#	endif
 
 #endif
 
diff --git a/test/include/test/SFMT-params11213.h b/test/include/test/SFMT-params11213.h
index 2994bd21..d2ab5b7c 100644
--- a/test/include/test/SFMT-params11213.h
+++ b/test/include/test/SFMT-params11213.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS11213_H
 #define SFMT_PARAMS11213_H
 
-#define POS1	68
-#define SL1	14
-#define SL2	3
-#define SR1	7
-#define SR2	3
-#define MSK1	0xeffff7fbU
-#define MSK2	0xffffffefU
-#define MSK3	0xdfdfbfffU
-#define MSK4	0x7fffdbfdU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xe8148000U
-#define PARITY4	0xd0c7afa3U
-
+#define POS1 68
+#define SL1 14
+#define SL2 3
+#define SR1 7
+#define SR2 3
+#define MSK1 0xeffff7fbU
+#define MSK2 0xffffffefU
+#define MSK3 0xdfdfbfffU
+#define MSK4 0x7fffdbfdU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xe8148000U
+#define PARITY4 0xd0c7afa3U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-11213:68-14-3-7-3:effff7fb-ffffffef-dfdfbfff-7fffdbfd"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-11213:68-14-3-7-3:effff7fb-ffffffef-dfdfbfff-7fffdbfd"
 
 #endif /* SFMT_PARAMS11213_H */
diff --git a/test/include/test/SFMT-params1279.h b/test/include/test/SFMT-params1279.h
index d7959f98..1be5c01d 100644
--- a/test/include/test/SFMT-params1279.h
+++ b/test/include/test/SFMT-params1279.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS1279_H
 #define SFMT_PARAMS1279_H
 
-#define POS1	7
-#define SL1	14
-#define SL2	3
-#define SR1	5
-#define SR2	1
-#define MSK1	0xf7fefffdU
-#define MSK2	0x7fefcfffU
-#define MSK3	0xaff3ef3fU
-#define MSK4	0xb5ffff7fU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x20000000U
-
+#define POS1 7
+#define SL1 14
+#define SL2 3
+#define SR1 5
+#define SR2 1
+#define MSK1 0xf7fefffdU
+#define MSK2 0x7fefcfffU
+#define MSK3 0xaff3ef3fU
+#define MSK4 0xb5ffff7fU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x20000000U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-1279:7-14-3-5-1:f7fefffd-7fefcfff-aff3ef3f-b5ffff7f"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-1279:7-14-3-5-1:f7fefffd-7fefcfff-aff3ef3f-b5ffff7f"
 
 #endif /* SFMT_PARAMS1279_H */
diff --git a/test/include/test/SFMT-params132049.h b/test/include/test/SFMT-params132049.h
index a1dcec39..1002614b 100644
--- a/test/include/test/SFMT-params132049.h
+++ b/test/include/test/SFMT-params132049.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS132049_H
 #define SFMT_PARAMS132049_H
 
-#define POS1	110
-#define SL1	19
-#define SL2	1
-#define SR1	21
-#define SR2	1
-#define MSK1	0xffffbb5fU
-#define MSK2	0xfb6ebf95U
-#define MSK3	0xfffefffaU
-#define MSK4	0xcff77fffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xcb520000U
-#define PARITY4	0xc7e91c7dU
-
+#define POS1 110
+#define SL1 19
+#define SL2 1
+#define SR1 21
+#define SR2 1
+#define MSK1 0xffffbb5fU
+#define MSK2 0xfb6ebf95U
+#define MSK3 0xfffefffaU
+#define MSK4 0xcff77fffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xcb520000U
+#define PARITY4 0xc7e91c7dU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-132049:110-19-1-21-1:ffffbb5f-fb6ebf95-fffefffa-cff77fff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-132049:110-19-1-21-1:ffffbb5f-fb6ebf95-fffefffa-cff77fff"
 
 #endif /* SFMT_PARAMS132049_H */
diff --git a/test/include/test/SFMT-params19937.h b/test/include/test/SFMT-params19937.h
index fb92b4c9..71df2713 100644
--- a/test/include/test/SFMT-params19937.h
+++ b/test/include/test/SFMT-params19937.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS19937_H
 #define SFMT_PARAMS19937_H
 
-#define POS1	122
-#define SL1	18
-#define SL2	1
-#define SR1	11
-#define SR2	1
-#define MSK1	0xdfffffefU
-#define MSK2	0xddfecb7fU
-#define MSK3	0xbffaffffU
-#define MSK4	0xbffffff6U
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x13c9e684U
-
+#define POS1 122
+#define SL1 18
+#define SL2 1
+#define SR1 11
+#define SR2 1
+#define MSK1 0xdfffffefU
+#define MSK2 0xddfecb7fU
+#define MSK3 0xbffaffffU
+#define MSK4 0xbffffff6U
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x13c9e684U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
 
 #endif /* SFMT_PARAMS19937_H */
diff --git a/test/include/test/SFMT-params216091.h b/test/include/test/SFMT-params216091.h
index 125ce282..d2d240e2 100644
--- a/test/include/test/SFMT-params216091.h
+++ b/test/include/test/SFMT-params216091.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS216091_H
 #define SFMT_PARAMS216091_H
 
-#define POS1	627
-#define SL1	11
-#define SL2	3
-#define SR1	10
-#define SR2	1
-#define MSK1	0xbff7bff7U
-#define MSK2	0xbfffffffU
-#define MSK3	0xbffffa7fU
-#define MSK4	0xffddfbfbU
-#define PARITY1	0xf8000001U
-#define PARITY2	0x89e80709U
-#define PARITY3	0x3bd2b64bU
-#define PARITY4	0x0c64b1e4U
-
+#define POS1 627
+#define SL1 11
+#define SL2 3
+#define SR1 10
+#define SR2 1
+#define MSK1 0xbff7bff7U
+#define MSK2 0xbfffffffU
+#define MSK3 0xbffffa7fU
+#define MSK4 0xffddfbfbU
+#define PARITY1 0xf8000001U
+#define PARITY2 0x89e80709U
+#define PARITY3 0x3bd2b64bU
+#define PARITY4 0x0c64b1e4U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
 
 #endif /* SFMT_PARAMS216091_H */
diff --git a/test/include/test/SFMT-params2281.h b/test/include/test/SFMT-params2281.h
index 0ef85c40..97b8de68 100644
--- a/test/include/test/SFMT-params2281.h
+++ b/test/include/test/SFMT-params2281.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS2281_H
 #define SFMT_PARAMS2281_H
 
-#define POS1	12
-#define SL1	19
-#define SL2	1
-#define SR1	5
-#define SR2	1
-#define MSK1	0xbff7ffbfU
-#define MSK2	0xfdfffffeU
-#define MSK3	0xf7ffef7fU
-#define MSK4	0xf2f7cbbfU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x41dfa600U
-
+#define POS1 12
+#define SL1 19
+#define SL2 1
+#define SR1 5
+#define SR2 1
+#define MSK1 0xbff7ffbfU
+#define MSK2 0xfdfffffeU
+#define MSK3 0xf7ffef7fU
+#define MSK4 0xf2f7cbbfU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x41dfa600U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-2281:12-19-1-5-1:bff7ffbf-fdfffffe-f7ffef7f-f2f7cbbf"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-2281:12-19-1-5-1:bff7ffbf-fdfffffe-f7ffef7f-f2f7cbbf"
 
 #endif /* SFMT_PARAMS2281_H */
diff --git a/test/include/test/SFMT-params4253.h b/test/include/test/SFMT-params4253.h
index 9f07bc67..7e51edd8 100644
--- a/test/include/test/SFMT-params4253.h
+++ b/test/include/test/SFMT-params4253.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS4253_H
 #define SFMT_PARAMS4253_H
 
-#define POS1	17
-#define SL1	20
-#define SL2	1
-#define SR1	7
-#define SR2	1
-#define MSK1	0x9f7bffffU
-#define MSK2	0x9fffff5fU
-#define MSK3	0x3efffffbU
-#define MSK4	0xfffff7bbU
-#define PARITY1	0xa8000001U
-#define PARITY2	0xaf5390a3U
-#define PARITY3	0xb740b3f8U
-#define PARITY4	0x6c11486dU
-
+#define POS1 17
+#define SL1 20
+#define SL2 1
+#define SR1 7
+#define SR2 1
+#define MSK1 0x9f7bffffU
+#define MSK2 0x9fffff5fU
+#define MSK3 0x3efffffbU
+#define MSK4 0xfffff7bbU
+#define PARITY1 0xa8000001U
+#define PARITY2 0xaf5390a3U
+#define PARITY3 0xb740b3f8U
+#define PARITY4 0x6c11486dU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-4253:17-20-1-7-1:9f7bffff-9fffff5f-3efffffb-fffff7bb"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-4253:17-20-1-7-1:9f7bffff-9fffff5f-3efffffb-fffff7bb"
 
 #endif /* SFMT_PARAMS4253_H */
diff --git a/test/include/test/SFMT-params44497.h b/test/include/test/SFMT-params44497.h
index 85598fed..8f6fee7b 100644
--- a/test/include/test/SFMT-params44497.h
+++ b/test/include/test/SFMT-params44497.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS44497_H
 #define SFMT_PARAMS44497_H
 
-#define POS1	330
-#define SL1	5
-#define SL2	3
-#define SR1	9
-#define SR2	3
-#define MSK1	0xeffffffbU
-#define MSK2	0xdfbebfffU
-#define MSK3	0xbfbf7befU
-#define MSK4	0x9ffd7bffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xa3ac4000U
-#define PARITY4	0xecc1327aU
-
+#define POS1 330
+#define SL1 5
+#define SL2 3
+#define SR1 9
+#define SR2 3
+#define MSK1 0xeffffffbU
+#define MSK2 0xdfbebfffU
+#define MSK3 0xbfbf7befU
+#define MSK4 0x9ffd7bffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xa3ac4000U
+#define PARITY4 0xecc1327aU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-44497:330-5-3-9-3:effffffb-dfbebfff-bfbf7bef-9ffd7bff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-44497:330-5-3-9-3:effffffb-dfbebfff-bfbf7bef-9ffd7bff"
 
 #endif /* SFMT_PARAMS44497_H */
diff --git a/test/include/test/SFMT-params607.h b/test/include/test/SFMT-params607.h
index bc76485f..29fb3913 100644
--- a/test/include/test/SFMT-params607.h
+++ b/test/include/test/SFMT-params607.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS607_H
 #define SFMT_PARAMS607_H
 
-#define POS1	2
-#define SL1	15
-#define SL2	3
-#define SR1	13
-#define SR2	3
-#define MSK1	0xfdff37ffU
-#define MSK2	0xef7f3f7dU
-#define MSK3	0xff777b7dU
-#define MSK4	0x7ff7fb2fU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x5986f054U
-
+#define POS1 2
+#define SL1 15
+#define SL2 3
+#define SR1 13
+#define SR2 3
+#define MSK1 0xfdff37ffU
+#define MSK2 0xef7f3f7dU
+#define MSK3 0xff777b7dU
+#define MSK4 0x7ff7fb2fU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x5986f054U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-607:2-15-3-13-3:fdff37ff-ef7f3f7d-ff777b7d-7ff7fb2f"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-607:2-15-3-13-3:fdff37ff-ef7f3f7d-ff777b7d-7ff7fb2f"
 
 #endif /* SFMT_PARAMS607_H */
diff --git a/test/include/test/SFMT-params86243.h b/test/include/test/SFMT-params86243.h
index 5e4d783c..5e3747e9 100644
--- a/test/include/test/SFMT-params86243.h
+++ b/test/include/test/SFMT-params86243.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS86243_H
 #define SFMT_PARAMS86243_H
 
-#define POS1	366
-#define SL1	6
-#define SL2	7
-#define SR1	19
-#define SR2	1
-#define MSK1	0xfdbffbffU
-#define MSK2	0xbff7ff3fU
-#define MSK3	0xfd77efffU
-#define MSK4	0xbf9ff3ffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0xe9528d85U
-
+#define POS1 366
+#define SL1 6
+#define SL2 7
+#define SR1 19
+#define SR2 1
+#define MSK1 0xfdbffbffU
+#define MSK2 0xbff7ff3fU
+#define MSK3 0xfd77efffU
+#define MSK4 0xbf9ff3ffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0xe9528d85U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(25,25,25,25,3,25,25,25,7,0,1,2,11,4,5,6)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(7,25,25,25,25,25,25,25,15,0,1,2,3,4,5,6)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{25,25,25,25,3,25,25,25,7,0,1,2,11,4,5,6}
-    #define ALTI_SL2_PERM64	{7,25,25,25,25,25,25,25,15,0,1,2,3,4,5,6}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-86243:366-6-7-19-1:fdbffbff-bff7ff3f-fd77efff-bf9ff3ff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(25, 25, 25, 25, 3, 25, 25, 25, 7, 0, 1, \
+		    2, 11, 4, 5, 6)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(7, 25, 25, 25, 25, 25, 25, 25, 15, 0,   \
+		    1, 2, 3, 4, 5, 6)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 25, 25, 25, 25, 3, 25, 25, 25, 7, 0, 1, 2, 11, 4, 5, 6 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 7, 25, 25, 25, 25, 25, 25, 25, 15, 0, 1, 2, 3, 4, 5, 6 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-86243:366-6-7-19-1:fdbffbff-bff7ff3f-fd77efff-bf9ff3ff"
 
 #endif /* SFMT_PARAMS86243_H */
diff --git a/test/include/test/SFMT-sse2.h b/test/include/test/SFMT-sse2.h
index 169ad558..83b35b43 100644
--- a/test/include/test/SFMT-sse2.h
+++ b/test/include/test/SFMT-sse2.h
@@ -60,48 +60,49 @@
  * @param mask 128-bit mask
  * @return output
  */
-JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
-				   __m128i c, __m128i d, __m128i mask) {
-    __m128i v, x, y, z;
+JEMALLOC_ALWAYS_INLINE __m128i
+mm_recursion(__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask) {
+	__m128i v, x, y, z;
 
-    x = _mm_load_si128(a);
-    y = _mm_srli_epi32(*b, SR1);
-    z = _mm_srli_si128(c, SR2);
-    v = _mm_slli_epi32(d, SL1);
-    z = _mm_xor_si128(z, x);
-    z = _mm_xor_si128(z, v);
-    x = _mm_slli_si128(x, SL2);
-    y = _mm_and_si128(y, mask);
-    z = _mm_xor_si128(z, x);
-    z = _mm_xor_si128(z, y);
-    return z;
+	x = _mm_load_si128(a);
+	y = _mm_srli_epi32(*b, SR1);
+	z = _mm_srli_si128(c, SR2);
+	v = _mm_slli_epi32(d, SL1);
+	z = _mm_xor_si128(z, x);
+	z = _mm_xor_si128(z, v);
+	x = _mm_slli_si128(x, SL2);
+	y = _mm_and_si128(y, mask);
+	z = _mm_xor_si128(z, x);
+	z = _mm_xor_si128(z, y);
+	return z;
 }
 
 /**
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    __m128i r, r1, r2, mask;
-    mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int     i;
+	__m128i r, r1, r2, mask;
+	mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
 
-    r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
-    r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
-    for (i = 0; i < N - POS1; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
-	  mask);
-	_mm_store_si128(&ctx->sfmt[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si, r1, r2,
-	  mask);
-	_mm_store_si128(&ctx->sfmt[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
+	r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
+	for (i = 0; i < N - POS1; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, mask);
+		_mm_store_si128(&ctx->sfmt[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si,
+		    r1, r2, mask);
+		_mm_store_si128(&ctx->sfmt[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 /**
@@ -111,47 +112,48 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pesudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    __m128i r, r1, r2, mask;
-    mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int     i, j;
+	__m128i r, r1, r2, mask;
+	mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
 
-    r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
-    r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
-    for (i = 0; i < N - POS1; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
-	  mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2,
-	  mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    /* main loop */
-    for (; i < size - N; i++) {
-	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
-			 mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	r = _mm_load_si128(&array[j + size - N].si);
-	_mm_store_si128(&ctx->sfmt[j].si, r);
-    }
-    for (; i < size; i++) {
-	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
-			 mask);
-	_mm_store_si128(&array[i].si, r);
-	_mm_store_si128(&ctx->sfmt[j++].si, r);
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
+	r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
+	for (i = 0; i < N - POS1; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	/* main loop */
+	for (; i < size - N; i++) {
+		r = mm_recursion(
+		    &array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		r = _mm_load_si128(&array[j + size - N].si);
+		_mm_store_si128(&ctx->sfmt[j].si, r);
+	}
+	for (; i < size; i++) {
+		r = mm_recursion(
+		    &array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		_mm_store_si128(&ctx->sfmt[j++].si, r);
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 #endif
diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h
index 863fc55e..0082c026 100644
--- a/test/include/test/SFMT.h
+++ b/test/include/test/SFMT.h
@@ -33,8 +33,8 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/** 
- * @file SFMT.h 
+/**
+ * @file SFMT.h
  *
  * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom
  * number generator
@@ -53,7 +53,7 @@
  * and you have to define PRIu64 and PRIx64 in this file as follows:
  * @verbatim
  typedef unsigned int uint32_t
- typedef unsigned long long uint64_t  
+ typedef unsigned long long uint64_t
  #define PRIu64 "llu"
  #define PRIx64 "llx"
 @endverbatim
@@ -68,79 +68,89 @@
 
 typedef struct sfmt_s sfmt_t;
 
-uint32_t gen_rand32(sfmt_t *ctx);
-uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit);
-uint64_t gen_rand64(sfmt_t *ctx);
-uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit);
-void fill_array32(sfmt_t *ctx, uint32_t *array, int size);
-void fill_array64(sfmt_t *ctx, uint64_t *array, int size);
-sfmt_t *init_gen_rand(uint32_t seed);
-sfmt_t *init_by_array(uint32_t *init_key, int key_length);
-void fini_gen_rand(sfmt_t *ctx);
+uint32_t    gen_rand32(sfmt_t *ctx);
+uint32_t    gen_rand32_range(sfmt_t *ctx, uint32_t limit);
+uint64_t    gen_rand64(sfmt_t *ctx);
+uint64_t    gen_rand64_range(sfmt_t *ctx, uint64_t limit);
+void        fill_array32(sfmt_t *ctx, uint32_t *array, int size);
+void        fill_array64(sfmt_t *ctx, uint64_t *array, int size);
+sfmt_t     *init_gen_rand(uint32_t seed);
+sfmt_t     *init_by_array(uint32_t *init_key, int key_length);
+void        fini_gen_rand(sfmt_t *ctx);
 const char *get_idstring(void);
-int get_min_array_size32(void);
-int get_min_array_size64(void);
+int         get_min_array_size32(void);
+int         get_min_array_size64(void);
 
 /* These real versions are due to Isaku Wada */
 /** generates a random number on [0,1]-real-interval */
-static inline double to_real1(uint32_t v) {
-    return v * (1.0/4294967295.0); 
-    /* divided by 2^32-1 */ 
+static inline double
+to_real1(uint32_t v) {
+	return v * (1.0 / 4294967295.0);
+	/* divided by 2^32-1 */
 }
 
 /** generates a random number on [0,1]-real-interval */
-static inline double genrand_real1(sfmt_t *ctx) {
-    return to_real1(gen_rand32(ctx));
+static inline double
+genrand_real1(sfmt_t *ctx) {
+	return to_real1(gen_rand32(ctx));
 }
 
 /** generates a random number on [0,1)-real-interval */
-static inline double to_real2(uint32_t v) {
-    return v * (1.0/4294967296.0); 
-    /* divided by 2^32 */
+static inline double
+to_real2(uint32_t v) {
+	return v * (1.0 / 4294967296.0);
+	/* divided by 2^32 */
 }
 
 /** generates a random number on [0,1)-real-interval */
-static inline double genrand_real2(sfmt_t *ctx) {
-    return to_real2(gen_rand32(ctx));
+static inline double
+genrand_real2(sfmt_t *ctx) {
+	return to_real2(gen_rand32(ctx));
 }
 
 /** generates a random number on (0,1)-real-interval */
-static inline double to_real3(uint32_t v) {
-    return (((double)v) + 0.5)*(1.0/4294967296.0); 
-    /* divided by 2^32 */
+static inline double
+to_real3(uint32_t v) {
+	return (((double)v) + 0.5) * (1.0 / 4294967296.0);
+	/* divided by 2^32 */
 }
 
 /** generates a random number on (0,1)-real-interval */
-static inline double genrand_real3(sfmt_t *ctx) {
-    return to_real3(gen_rand32(ctx));
+static inline double
+genrand_real3(sfmt_t *ctx) {
+	return to_real3(gen_rand32(ctx));
 }
 /** These real versions are due to Isaku Wada */
 
 /** generates a random number on [0,1) with 53-bit resolution*/
-static inline double to_res53(uint64_t v) {
-    return v * (1.0/18446744073709551616.0L);
+static inline double
+to_res53(uint64_t v) {
+	return v * (1.0 / 18446744073709551616.0L);
 }
 
 /** generates a random number on [0,1) with 53-bit resolution from two
  * 32 bit integers */
-static inline double to_res53_mix(uint32_t x, uint32_t y) {
-    return to_res53(x | ((uint64_t)y << 32));
+static inline double
+to_res53_mix(uint32_t x, uint32_t y) {
+	return to_res53(x | ((uint64_t)y << 32));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
  */
-static inline double genrand_res53(sfmt_t *ctx) {
-    return to_res53(gen_rand64(ctx));
+static inline double
+genrand_res53(sfmt_t *ctx) {
+	return to_res53(gen_rand64(ctx));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
     using 32bit integer.
  */
-static inline double genrand_res53_mix(sfmt_t *ctx) {
-    uint32_t x, y;
+static inline double
+genrand_res53_mix(sfmt_t *ctx) {
+	uint32_t x, y;
 
-    x = gen_rand32(ctx);
-    y = gen_rand32(ctx);
-    return to_res53_mix(x, y);
+	x = gen_rand32(ctx);
+	y = gen_rand32(ctx);
+	return to_res53_mix(x, y);
 }
 #endif
diff --git a/test/include/test/arena_util.h b/test/include/test/arena_util.h
index 9a41dacb..431fdfae 100644
--- a/test/include/test/arena_util.h
+++ b/test/include/test/arena_util.h
@@ -1,39 +1,39 @@
 static inline unsigned
 do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 
-	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)),
+	    0, "Unexpected mallctlbymib() failure");
 
-	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)),
+	    0, "Unexpected mallctlbymib() failure");
 
 	return arena_ind;
 }
 
 static inline void
 do_arena_destroy(unsigned arena_ind) {
-	/* 
+	/*
 	 * For convenience, flush tcache in case there are cached items.
 	 * However not assert success since the tcache may be disabled.
 	 */
 	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
 
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -51,7 +51,7 @@ do_epoch(void) {
 static inline void
 do_purge(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -62,7 +62,7 @@ do_purge(unsigned arena_ind) {
 static inline void
 do_decay(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -73,12 +73,12 @@ do_decay(unsigned arena_ind) {
 static inline uint64_t
 get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	uint64_t npurge = 0;
-	size_t sz = sizeof(npurge);
+	size_t   sz = sizeof(npurge);
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
 	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
 	return npurge;
@@ -105,15 +105,15 @@ get_arena_muzzy_npurge(unsigned arena_ind) {
 static inline uint64_t
 get_arena_npurge(unsigned arena_ind) {
 	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
-	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind)
+	    + get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
 }
 
 static inline size_t
 get_arena_pdirty(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
@@ -128,7 +128,7 @@ static inline size_t
 get_arena_pmuzzy(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
@@ -148,8 +148,7 @@ do_mallocx(size_t size, int flags) {
 
 static inline void
 generate_dirty(unsigned arena_ind, size_t size) {
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int   flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	void *p = do_mallocx(size, flags);
 	dallocx(p, flags);
 }
-
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 0397c948..faebfd77 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -1,6 +1,6 @@
 static inline void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
-    void (*func)(void)) {
+time_func(
+    timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void)) {
 	uint64_t i;
 
 	for (i = 0; i < nwarmup; i++) {
@@ -23,16 +23,16 @@ fmt_nsecs(uint64_t usec, uint64_t iters, char *buf) {
 	uint64_t nsecs_per_iter1000 = nsec1000 / iters;
 	uint64_t intpart = nsecs_per_iter1000 / 1000;
 	uint64_t fracpart = nsecs_per_iter1000 % 1000;
-	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%"FMTu64".%03"FMTu64, intpart,
-	    fracpart);
+	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%" FMTu64 ".%03" FMTu64,
+	    intpart, fracpart);
 }
 
 static inline void
 compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
-    void (*func_a), const char *name_b, void (*func_b)) {
+    void(*func_a), const char *name_b, void(*func_b)) {
 	timedelta_t timer_a, timer_b;
-	char ratio_buf[6];
-	void *p;
+	char        ratio_buf[6];
+	void       *p;
 
 	p = mallocx(1, 0);
 	if (p == NULL) {
@@ -40,21 +40,34 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 		return;
 	}
 
-	time_func(&timer_a, nwarmup, niter, func_a);
-	time_func(&timer_b, nwarmup, niter, func_b);
+	time_func(&timer_a, nwarmup, niter, (void (*)(void))func_a);
+	time_func(&timer_b, nwarmup, niter, (void (*)(void))func_b);
 
 	uint64_t usec_a = timer_usec(&timer_a);
-	char buf_a[FMT_NSECS_BUF_SIZE];
+	char     buf_a[FMT_NSECS_BUF_SIZE];
 	fmt_nsecs(usec_a, niter, buf_a);
 
 	uint64_t usec_b = timer_usec(&timer_b);
-	char buf_b[FMT_NSECS_BUF_SIZE];
+	char     buf_b[FMT_NSECS_BUF_SIZE];
 	fmt_nsecs(usec_b, niter, buf_b);
 
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us (%s ns/iter), "
-	    "%s=%"FMTu64"us (%s ns/iter), ratio=1:%s\n",
+	malloc_printf("%" FMTu64 " iterations, %s=%" FMTu64
+	              "us (%s ns/iter), "
+	              "%s=%" FMTu64
+	              "us (%s ns/iter), time consumption ratio=%s:1\n",
 	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);
 }
+
+static inline void *
+no_opt_ptr(void *ptr) {
+#ifdef JEMALLOC_HAVE_ASM_VOLATILE
+	asm volatile("" : "+r"(ptr));
+#else
+	void *volatile dup = ptr;
+	ptr = dup;
+#endif
+	return ptr;
+}
diff --git a/test/include/test/bgthd.h b/test/include/test/bgthd.h
index 4fa2395e..0a7e789b 100644
--- a/test/include/test/bgthd.h
+++ b/test/include/test/bgthd.h
@@ -5,9 +5,9 @@
 
 static inline bool
 is_background_thread_enabled(void) {
-	bool enabled;
+	bool   enabled;
 	size_t sz = sizeof(bool);
-	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL, 0);
 	if (ret == ENOENT) {
 		return false;
 	}
diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h
index 8f345993..04a336d5 100644
--- a/test/include/test/btalloc.h
+++ b/test/include/test/btalloc.h
@@ -1,30 +1,28 @@
 /* btalloc() provides a mechanism for allocating via permuted backtraces. */
-void	*btalloc(size_t size, unsigned bits);
+void *btalloc(size_t size, unsigned bits);
 
-#define btalloc_n_proto(n)						\
-void	*btalloc_##n(size_t size, unsigned bits);
-btalloc_n_proto(0)
-btalloc_n_proto(1)
+#define btalloc_n_proto(n) void *btalloc_##n(size_t size, unsigned bits);
+btalloc_n_proto(0) btalloc_n_proto(1)
 
-#define btalloc_n_gen(n)						\
-void *									\
-btalloc_##n(size_t size, unsigned bits) {				\
-	void *p;							\
-									\
-	if (bits == 0) {						\
-		p = mallocx(size, 0);					\
-	} else {							\
-		switch (bits & 0x1U) {					\
-		case 0:							\
-			p = (btalloc_0(size, bits >> 1));		\
-			break;						\
-		case 1:							\
-			p = (btalloc_1(size, bits >> 1));		\
-			break;						\
-		default: not_reached();					\
-		}							\
-	}								\
-	/* Intentionally sabotage tail call optimization. */		\
-	expect_ptr_not_null(p, "Unexpected mallocx() failure");		\
-	return p;							\
-}
+#define btalloc_n_gen(n)                                                       \
+	void *btalloc_##n(size_t size, unsigned bits) {                        \
+		void *p;                                                       \
+                                                                               \
+		if (bits == 0) {                                               \
+			p = mallocx(size, 0);                                  \
+		} else {                                                       \
+			switch (bits & 0x1U) {                                 \
+			case 0:                                                \
+				p = (btalloc_0(size, bits >> 1));              \
+				break;                                         \
+			case 1:                                                \
+				p = (btalloc_1(size, bits >> 1));              \
+				break;                                         \
+			default:                                               \
+				not_reached();                                 \
+			}                                                      \
+		}                                                              \
+		/* Intentionally sabotage tail call optimization. */           \
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");        \
+		return p;                                                      \
+	}
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index aad0a46c..33bb8593 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -3,40 +3,33 @@
  * passthrough.
  */
 
-static void	*extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
+static void *extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
-static bool	extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
-static void	extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
-static bool	extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_forced_hook(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_split_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t size_a, size_t size_b, bool committed,
-    unsigned arena_ind);
-static bool	extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a,
-    size_t size_a, void *addr_b, size_t size_b, bool committed,
-    unsigned arena_ind);
+static bool  extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, bool committed, unsigned arena_ind);
+static void  extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, bool committed, unsigned arena_ind);
+static bool  extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_split_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t size_a, size_t size_b, bool committed,
+     unsigned arena_ind);
+static bool  extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a,
+     size_t size_a, void *addr_b, size_t size_b, bool committed,
+     unsigned arena_ind);
 
 static extent_hooks_t *default_hooks;
-static extent_hooks_t hooks = {
-	extent_alloc_hook,
-	extent_dalloc_hook,
-	extent_destroy_hook,
-	extent_commit_hook,
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	extent_split_hook,
-	extent_merge_hook
-};
+static extent_hooks_t  hooks = {extent_alloc_hook, extent_dalloc_hook,
+     extent_destroy_hook, extent_commit_hook, extent_decommit_hook,
+     extent_purge_lazy_hook, extent_purge_forced_hook, extent_split_hook,
+     extent_merge_hook};
 
 /* Control whether hook functions pass calls through to default hooks. */
 static bool try_alloc = true;
@@ -72,9 +65,9 @@ static bool did_split;
 static bool did_merge;
 
 #if 0
-#  define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
+#	define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
 #else
-#  define TRACE_HOOK(fmt, ...)
+#	define TRACE_HOOK(fmt, ...)
 #endif
 
 static void *
@@ -82,20 +75,21 @@ extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	void *ret;
 
-	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
-	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
+	    "*zero=%s, *commit=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, new_addr, size, alignment,
+	    *zero ? "true" : "false", *commit ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->alloc, extent_alloc_hook, "Wrong hook function");
 	called_alloc = true;
 	if (!try_alloc) {
 		return NULL;
 	}
-	ret = default_hooks->alloc(default_hooks, new_addr, size, alignment,
-	    zero, commit, 0);
+	ret = default_hooks->alloc(
+	    default_hooks, new_addr, size, alignment, zero, commit, 0);
 	did_alloc = (ret != NULL);
 	return ret;
 }
@@ -105,13 +99,15 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->dalloc, extent_dalloc_hook, "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
 		return true;
@@ -124,13 +120,15 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 static void
 extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->destroy, extent_destroy_hook, "Wrong hook function");
 	called_destroy = true;
 	if (!try_destroy) {
 		return;
@@ -144,19 +142,20 @@ extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->commit, extent_commit_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->commit, extent_commit_hook, "Wrong hook function");
 	called_commit = true;
 	if (!try_commit) {
 		return true;
 	}
-	err = default_hooks->commit(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->commit(
+	    default_hooks, addr, size, offset, length, 0);
 	did_commit = !err;
 	return err;
 }
@@ -166,9 +165,10 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
@@ -177,8 +177,8 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_decommit) {
 		return true;
 	}
-	err = default_hooks->decommit(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->decommit(
+	    default_hooks, addr, size, offset, length, 0);
 	did_decommit = !err;
 	return err;
 }
@@ -188,9 +188,10 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
@@ -199,9 +200,9 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_purge_lazy) {
 		return true;
 	}
-	err = default_hooks->purge_lazy == NULL ||
-	    default_hooks->purge_lazy(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->purge_lazy == NULL
+	    || default_hooks->purge_lazy(
+	        default_hooks, addr, size, offset, length, 0);
 	did_purge_lazy = !err;
 	return err;
 }
@@ -211,9 +212,10 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
@@ -222,9 +224,9 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_purge_forced) {
 		return true;
 	}
-	err = default_hooks->purge_forced == NULL ||
-	    default_hooks->purge_forced(default_hooks, addr, size, offset,
-	    length, 0);
+	err = default_hooks->purge_forced == NULL
+	    || default_hooks->purge_forced(
+	        default_hooks, addr, size, offset, length, 0);
 	did_purge_forced = !err;
 	return err;
 }
@@ -234,21 +236,22 @@ extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
-	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    addr, size, size_a, size_b, committed ? "true" : "false",
-	    arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, size_a, size_b,
+	    committed ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->split, extent_split_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->split, extent_split_hook, "Wrong hook function");
 	called_split = true;
 	if (!try_split) {
 		return true;
 	}
-	err = (default_hooks->split == NULL ||
-	    default_hooks->split(default_hooks, addr, size, size_a, size_b,
-	    committed, 0));
+	err = (default_hooks->split == NULL
+	    || default_hooks->split(
+	        default_hooks, addr, size, size_a, size_b, committed, 0));
 	did_split = !err;
 	return err;
 }
@@ -258,23 +261,24 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
-	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
-	    arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr_a, size_a, addr_b, size_b,
+	    committed ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->merge, extent_merge_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->merge, extent_merge_hook, "Wrong hook function");
 	expect_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
 	    "Extents not mergeable");
 	called_merge = true;
 	if (!try_merge) {
 		return true;
 	}
-	err = (default_hooks->merge == NULL ||
-	    default_hooks->merge(default_hooks, addr_a, size_a, addr_b, size_b,
-	    committed, 0));
+	err = (default_hooks->merge == NULL
+	    || default_hooks->merge(
+	        default_hooks, addr_a, size_a, addr_b, size_b, committed, 0));
 	did_merge = !err;
 	return err;
 }
@@ -285,5 +289,6 @@ extent_hooks_prep(void) {
 
 	sz = sizeof(default_hooks);
 	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() error");
+	                NULL, 0),
+	    0, "Unexpected mallctl() error");
 }
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 3f8c0da7..8b139db1 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_TEST_H
+#define JEMALLOC_TEST_H
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -58,14 +61,7 @@ extern "C" {
 #  include "jemalloc/jemalloc@install_suffix@.h"
 #  include "jemalloc/internal/jemalloc_internal_defs.h"
 #  include "jemalloc/internal/jemalloc_internal_macros.h"
-
-static const bool config_debug =
-#ifdef JEMALLOC_DEBUG
-    true
-#else
-    false
-#endif
-    ;
+#  include "jemalloc/internal/jemalloc_preamble.h"
 
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
@@ -88,7 +84,8 @@ static const bool config_debug =
  * public jemalloc interfaces with jet_ prefixes, so that stress tests can use
  * a separate allocator for their internal data structures.
  */
-#elif defined(JEMALLOC_STRESS_TEST)
+#elif defined(JEMALLOC_STRESS_TEST) || \
+    defined(JEMALLOC_STRESS_CPP_TEST)
 #  include "jemalloc/jemalloc@install_suffix@.h"
 
 #  include "jemalloc/jemalloc_protos_jet.h"
@@ -178,3 +175,5 @@ static const bool config_debug =
 #ifdef __cplusplus
 }
 #endif
+
+#endif
diff --git a/test/include/test/math.h b/test/include/test/math.h
index efba086d..c9b32e91 100644
--- a/test/include/test/math.h
+++ b/test/include/test/math.h
@@ -27,9 +27,12 @@ ln_gamma(double x) {
 
 	z = 1.0 / (x * x);
 
-	return f + (x-0.5) * log(x) - x + 0.918938533204673 +
-	    (((-0.000595238095238 * z + 0.000793650793651) * z -
-	    0.002777777777778) * z + 0.083333333333333) / x;
+	return f + (x - 0.5) * log(x) - x + 0.918938533204673
+	    + (((-0.000595238095238 * z + 0.000793650793651) * z
+	           - 0.002777777777778)
+	              * z
+	          + 0.083333333333333)
+	    / x;
 }
 
 /*
@@ -43,8 +46,8 @@ ln_gamma(double x) {
  */
 static inline double
 i_gamma(double x, double p, double ln_gamma_p) {
-	double acu, factor, oflo, gin, term, rn, a, b, an, dif;
-	double pn[6];
+	double   acu, factor, oflo, gin, term, rn, a, b, an, dif;
+	double   pn[6];
 	unsigned i;
 
 	assert(p > 0.0);
@@ -91,7 +94,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 			term += 1.0;
 			an = a * term;
 			for (i = 0; i < 2; i++) {
-				pn[i+4] = b * pn[i+2] - an * pn[i];
+				pn[i + 4] = b * pn[i + 2] - an * pn[i];
 			}
 			if (pn[5] != 0.0) {
 				rn = pn[4] / pn[5];
@@ -103,7 +106,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 				gin = rn;
 			}
 			for (i = 0; i < 4; i++) {
-				pn[i] = pn[i+2];
+				pn[i] = pn[i + 2];
 			}
 
 			if (fabs(pn[4]) >= oflo) {
@@ -135,16 +138,35 @@ pt_norm(double p) {
 	if (fabs(q) <= 0.425) {
 		/* p close to 1/2. */
 		r = 0.180625 - q * q;
-		return q * (((((((2.5090809287301226727e3 * r +
-		    3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r
-		    + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) *
-		    r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2)
-		    * r + 3.3871328727963666080e0) /
-		    (((((((5.2264952788528545610e3 * r +
-		    2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r
-		    + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) *
-		    r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1)
-		    * r + 1.0);
+		return q
+		    * (((((((2.5090809287301226727e3 * r
+		                + 3.3430575583588128105e4)
+		                   * r
+		               + 6.7265770927008700853e4)
+		                  * r
+		              + 4.5921953931549871457e4)
+		                 * r
+		             + 1.3731693765509461125e4)
+		                * r
+		            + 1.9715909503065514427e3)
+		               * r
+		           + 1.3314166789178437745e2)
+		            * r
+		        + 3.3871328727963666080e0)
+		    / (((((((5.2264952788528545610e3 * r
+		                + 2.8729085735721942674e4)
+		                   * r
+		               + 3.9307895800092710610e4)
+		                  * r
+		              + 2.1213794301586595867e4)
+		                 * r
+		             + 5.3941960214247511077e3)
+		                * r
+		            + 6.8718700749205790830e2)
+		               * r
+		           + 4.2313330701600911252e1)
+		            * r
+		        + 1.0);
 	} else {
 		if (q < 0.0) {
 			r = p;
@@ -157,40 +179,65 @@ pt_norm(double p) {
 		if (r <= 5.0) {
 			/* p neither close to 1/2 nor 0 or 1. */
 			r -= 1.6;
-			ret = ((((((((7.74545014278341407640e-4 * r +
-			    2.27238449892691845833e-2) * r +
-			    2.41780725177450611770e-1) * r +
-			    1.27045825245236838258e0) * r +
-			    3.64784832476320460504e0) * r +
-			    5.76949722146069140550e0) * r +
-			    4.63033784615654529590e0) * r +
-			    1.42343711074968357734e0) /
-			    (((((((1.05075007164441684324e-9 * r +
-			    5.47593808499534494600e-4) * r +
-			    1.51986665636164571966e-2)
-			    * r + 1.48103976427480074590e-1) * r +
-			    6.89767334985100004550e-1) * r +
-			    1.67638483018380384940e0) * r +
-			    2.05319162663775882187e0) * r + 1.0));
+			ret = ((((((((7.74545014278341407640e-4 * r
+			                 + 2.27238449892691845833e-2)
+			                    * r
+			                + 2.41780725177450611770e-1)
+			                   * r
+			               + 1.27045825245236838258e0)
+			                  * r
+			              + 3.64784832476320460504e0)
+			                 * r
+			             + 5.76949722146069140550e0)
+			                * r
+			            + 4.63033784615654529590e0)
+			               * r
+			           + 1.42343711074968357734e0)
+			    / (((((((1.05075007164441684324e-9 * r
+			                + 5.47593808499534494600e-4)
+			                   * r
+			               + 1.51986665636164571966e-2)
+			                  * r
+			              + 1.48103976427480074590e-1)
+			                 * r
+			             + 6.89767334985100004550e-1)
+			                * r
+			            + 1.67638483018380384940e0)
+			               * r
+			           + 2.05319162663775882187e0)
+			            * r
+			        + 1.0));
 		} else {
 			/* p near 0 or 1. */
 			r -= 5.0;
-			ret = ((((((((2.01033439929228813265e-7 * r +
-			    2.71155556874348757815e-5) * r +
-			    1.24266094738807843860e-3) * r +
-			    2.65321895265761230930e-2) * r +
-			    2.96560571828504891230e-1) * r +
-			    1.78482653991729133580e0) * r +
-			    5.46378491116411436990e0) * r +
-			    6.65790464350110377720e0) /
-			    (((((((2.04426310338993978564e-15 * r +
-			    1.42151175831644588870e-7) * r +
-			    1.84631831751005468180e-5) * r +
-			    7.86869131145613259100e-4) * r +
-			    1.48753612908506148525e-2) * r +
-			    1.36929880922735805310e-1) * r +
-			    5.99832206555887937690e-1)
-			    * r + 1.0));
+			ret = ((((((((2.01033439929228813265e-7 * r
+			                 + 2.71155556874348757815e-5)
+			                    * r
+			                + 1.24266094738807843860e-3)
+			                   * r
+			               + 2.65321895265761230930e-2)
+			                  * r
+			              + 2.96560571828504891230e-1)
+			                 * r
+			             + 1.78482653991729133580e0)
+			                * r
+			            + 5.46378491116411436990e0)
+			               * r
+			           + 6.65790464350110377720e0)
+			    / (((((((2.04426310338993978564e-15 * r
+			                + 1.42151175831644588870e-7)
+			                   * r
+			               + 1.84631831751005468180e-5)
+			                  * r
+			              + 7.86869131145613259100e-4)
+			                 * r
+			             + 1.48753612908506148525e-2)
+			                * r
+			            + 1.36929880922735805310e-1)
+			               * r
+			           + 5.99832206555887937690e-1)
+			            * r
+			        + 1.0));
 		}
 		if (q < 0.0) {
 			ret = -ret;
@@ -244,8 +291,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 			ch = df * pow(x * sqrt(p1) + 1.0 - p1, 3.0);
 			/* Starting approximation for p tending to 1. */
 			if (ch > 2.2 * df + 6.0) {
-				ch = -2.0 * (log(1.0 - p) - c * log(0.5 * ch) +
-				    ln_gamma_df_2);
+				ch = -2.0
+				    * (log(1.0 - p) - c * log(0.5 * ch)
+				        + ln_gamma_df_2);
 			}
 		} else {
 			ch = 0.4;
@@ -254,10 +302,13 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 				q = ch;
 				p1 = 1.0 + ch * (4.67 + ch);
 				p2 = ch * (6.73 + ch * (6.66 + ch));
-				t = -0.5 + (4.67 + 2.0 * ch) / p1 - (6.73 + ch
-				    * (13.32 + 3.0 * ch)) / p2;
-				ch -= (1.0 - exp(a + ln_gamma_df_2 + 0.5 * ch +
-				    c * aa) * p2 / p1) / t;
+				t = -0.5 + (4.67 + 2.0 * ch) / p1
+				    - (6.73 + ch * (13.32 + 3.0 * ch)) / p2;
+				ch -= (1.0
+				          - exp(a + ln_gamma_df_2 + 0.5 * ch
+				                + c * aa)
+				              * p2 / p1)
+				    / t;
 				if (fabs(q / ch - 1.0) - 0.01 <= 0.0) {
 					break;
 				}
@@ -276,17 +327,36 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 		t = p2 * exp(xx * aa + ln_gamma_df_2 + p1 - c * log(ch));
 		b = t / ch;
 		a = 0.5 * t - b * c;
-		s1 = (210.0 + a * (140.0 + a * (105.0 + a * (84.0 + a * (70.0 +
-		    60.0 * a))))) / 420.0;
-		s2 = (420.0 + a * (735.0 + a * (966.0 + a * (1141.0 + 1278.0 *
-		    a)))) / 2520.0;
+		s1 = (210.0
+		         + a
+		             * (140.0
+		                 + a
+		                     * (105.0
+		                         + a * (84.0 + a * (70.0 + 60.0 * a)))))
+		    / 420.0;
+		s2 =
+		    (420.0
+		        + a * (735.0 + a * (966.0 + a * (1141.0 + 1278.0 * a))))
+		    / 2520.0;
 		s3 = (210.0 + a * (462.0 + a * (707.0 + 932.0 * a))) / 2520.0;
-		s4 = (252.0 + a * (672.0 + 1182.0 * a) + c * (294.0 + a *
-		    (889.0 + 1740.0 * a))) / 5040.0;
+		s4 = (252.0 + a * (672.0 + 1182.0 * a)
+		         + c * (294.0 + a * (889.0 + 1740.0 * a)))
+		    / 5040.0;
 		s5 = (84.0 + 264.0 * a + c * (175.0 + 606.0 * a)) / 2520.0;
 		s6 = (120.0 + c * (346.0 + 127.0 * c)) / 5040.0;
-		ch += t * (1.0 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3
-		    - b * (s4 - b * (s5 - b * s6))))));
+		ch += t
+		    * (1.0 + 0.5 * t * s1
+		        - b * c
+		            * (s1
+		                - b
+		                    * (s2
+		                        - b
+		                            * (s3
+		                                - b
+		                                    * (s4
+		                                        - b
+		                                            * (s5
+		                                                - b * s6))))));
 		if (fabs(q / ch - 1.0) <= e) {
 			break;
 		}
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index 5dc6486c..4a68d709 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -26,82 +26,74 @@
  * does not perform any cleanup of messages, since it knows nothing of their
  * payloads.
  */
-#define mq_msg(a_mq_msg_type)	ql_elm(a_mq_msg_type)
+#define mq_msg(a_mq_msg_type) ql_elm(a_mq_msg_type)
 
-#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field)	\
-typedef struct {							\
-	mtx_t			lock;					\
-	ql_head(a_mq_msg_type)	msgs;					\
-	unsigned		count;					\
-} a_mq_type;								\
-a_attr bool								\
-a_prefix##init(a_mq_type *mq) {						\
-									\
-	if (mtx_init(&mq->lock)) {					\
-		return true;						\
-	}								\
-	ql_new(&mq->msgs);						\
-	mq->count = 0;							\
-	return false;							\
-}									\
-a_attr void								\
-a_prefix##fini(a_mq_type *mq) {						\
-	mtx_fini(&mq->lock);						\
-}									\
-a_attr unsigned								\
-a_prefix##count(a_mq_type *mq) {					\
-	unsigned count;							\
-									\
-	mtx_lock(&mq->lock);						\
-	count = mq->count;						\
-	mtx_unlock(&mq->lock);						\
-	return count;							\
-}									\
-a_attr a_mq_msg_type *							\
-a_prefix##tryget(a_mq_type *mq) {					\
-	a_mq_msg_type *msg;						\
-									\
-	mtx_lock(&mq->lock);						\
-	msg = ql_first(&mq->msgs);					\
-	if (msg != NULL) {						\
-		ql_head_remove(&mq->msgs, a_mq_msg_type, a_field);	\
-		mq->count--;						\
-	}								\
-	mtx_unlock(&mq->lock);						\
-	return msg;							\
-}									\
-a_attr a_mq_msg_type *							\
-a_prefix##get(a_mq_type *mq) {						\
-	a_mq_msg_type *msg;						\
-	unsigned ns;							\
-									\
-	msg = a_prefix##tryget(mq);					\
-	if (msg != NULL) {						\
-		return msg;						\
-	}								\
-									\
-	ns = 1;								\
-	while (true) {							\
-		sleep_ns(ns);						\
-		msg = a_prefix##tryget(mq);				\
-		if (msg != NULL) {					\
-			return msg;					\
-		}							\
-		if (ns < 1000*1000*1000) {				\
-			/* Double sleep time, up to max 1 second. */	\
-			ns <<= 1;					\
-			if (ns > 1000*1000*1000) {			\
-				ns = 1000*1000*1000;			\
-			}						\
-		}							\
-	}								\
-}									\
-a_attr void								\
-a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) {			\
-									\
-	mtx_lock(&mq->lock);						\
-	ql_elm_new(msg, a_field);					\
-	ql_tail_insert(&mq->msgs, msg, a_field);			\
-	mq->count++;							\
-	mtx_unlock(&mq->lock);						\
-}
+#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field)            \
+	typedef struct {                                                       \
+		mtx_t lock;                                                    \
+		ql_head(a_mq_msg_type) msgs;                                   \
+		unsigned count;                                                \
+	} a_mq_type;                                                           \
+	a_attr bool a_prefix##init(a_mq_type *mq) {                            \
+		if (mtx_init(&mq->lock)) {                                     \
+			return true;                                           \
+		}                                                              \
+		ql_new(&mq->msgs);                                             \
+		mq->count = 0;                                                 \
+		return false;                                                  \
+	}                                                                      \
+	a_attr void a_prefix##fini(a_mq_type *mq) {                            \
+		mtx_fini(&mq->lock);                                           \
+	}                                                                      \
+	a_attr unsigned a_prefix##count(a_mq_type *mq) {                       \
+		unsigned count;                                                \
+                                                                               \
+		mtx_lock(&mq->lock);                                           \
+		count = mq->count;                                             \
+		mtx_unlock(&mq->lock);                                         \
+		return count;                                                  \
+	}                                                                      \
+	a_attr a_mq_msg_type *a_prefix##tryget(a_mq_type *mq) {                \
+		a_mq_msg_type *msg;                                            \
+                                                                               \
+		mtx_lock(&mq->lock);                                           \
+		msg = ql_first(&mq->msgs);                                     \
+		if (msg != NULL) {                                             \
+			ql_head_remove(&mq->msgs, a_mq_msg_type, a_field);     \
+			mq->count--;                                           \
+		}                                                              \
+		mtx_unlock(&mq->lock);                                         \
+		return msg;                                                    \
+	}                                                                      \
+	a_attr a_mq_msg_type *a_prefix##get(a_mq_type *mq) {                   \
+		a_mq_msg_type *msg;                                            \
+		unsigned       ns;                                             \
+                                                                               \
+		msg = a_prefix##tryget(mq);                                    \
+		if (msg != NULL) {                                             \
+			return msg;                                            \
+		}                                                              \
+                                                                               \
+		ns = 1;                                                        \
+		while (true) {                                                 \
+			sleep_ns(ns);                                          \
+			msg = a_prefix##tryget(mq);                            \
+			if (msg != NULL) {                                     \
+				return msg;                                    \
+			}                                                      \
+			if (ns < 1000 * 1000 * 1000) {                         \
+				/* Double sleep time, up to max 1 second. */   \
+				ns <<= 1;                                      \
+				if (ns > 1000 * 1000 * 1000) {                 \
+					ns = 1000 * 1000 * 1000;               \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	}                                                                      \
+	a_attr void a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) {         \
+		mtx_lock(&mq->lock);                                           \
+		ql_elm_new(msg, a_field);                                      \
+		ql_tail_insert(&mq->msgs, msg, a_field);                       \
+		mq->count++;                                                   \
+		mtx_unlock(&mq->lock);                                         \
+	}
diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h
index 066a2137..c771ca3a 100644
--- a/test/include/test/mtx.h
+++ b/test/include/test/mtx.h
@@ -7,15 +7,15 @@
 
 typedef struct {
 #ifdef _WIN32
-	CRITICAL_SECTION	lock;
+	CRITICAL_SECTION lock;
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-	os_unfair_lock		lock;
+	os_unfair_lock lock;
 #else
-	pthread_mutex_t		lock;
+	pthread_mutex_t lock;
 #endif
 } mtx_t;
 
-bool	mtx_init(mtx_t *mtx);
-void	mtx_fini(mtx_t *mtx);
-void	mtx_lock(mtx_t *mtx);
-void	mtx_unlock(mtx_t *mtx);
+bool mtx_init(mtx_t *mtx);
+void mtx_fini(mtx_t *mtx);
+void mtx_lock(mtx_t *mtx);
+void mtx_unlock(mtx_t *mtx);
diff --git a/test/include/test/nbits.h b/test/include/test/nbits.h
index c06cf1b4..2c30a61c 100644
--- a/test/include/test/nbits.h
+++ b/test/include/test/nbits.h
@@ -3,109 +3,109 @@
 
 /* Interesting bitmap counts to test. */
 
-#define NBITS_TAB \
-    NB( 1) \
-    NB( 2) \
-    NB( 3) \
-    NB( 4) \
-    NB( 5) \
-    NB( 6) \
-    NB( 7) \
-    NB( 8) \
-    NB( 9) \
-    NB(10) \
-    NB(11) \
-    NB(12) \
-    NB(13) \
-    NB(14) \
-    NB(15) \
-    NB(16) \
-    NB(17) \
-    NB(18) \
-    NB(19) \
-    NB(20) \
-    NB(21) \
-    NB(22) \
-    NB(23) \
-    NB(24) \
-    NB(25) \
-    NB(26) \
-    NB(27) \
-    NB(28) \
-    NB(29) \
-    NB(30) \
-    NB(31) \
-    NB(32) \
-    \
-    NB(33) \
-    NB(34) \
-    NB(35) \
-    NB(36) \
-    NB(37) \
-    NB(38) \
-    NB(39) \
-    NB(40) \
-    NB(41) \
-    NB(42) \
-    NB(43) \
-    NB(44) \
-    NB(45) \
-    NB(46) \
-    NB(47) \
-    NB(48) \
-    NB(49) \
-    NB(50) \
-    NB(51) \
-    NB(52) \
-    NB(53) \
-    NB(54) \
-    NB(55) \
-    NB(56) \
-    NB(57) \
-    NB(58) \
-    NB(59) \
-    NB(60) \
-    NB(61) \
-    NB(62) \
-    NB(63) \
-    NB(64) \
-    NB(65) \
-    NB(66) \
-    NB(67) \
-    \
-    NB(126) \
-    NB(127) \
-    NB(128) \
-    NB(129) \
-    NB(130) \
-    \
-    NB(254) \
-    NB(255) \
-    NB(256) \
-    NB(257) \
-    NB(258) \
-    \
-    NB(510) \
-    NB(511) \
-    NB(512) \
-    NB(513) \
-    NB(514) \
-    \
-    NB(1022) \
-    NB(1023) \
-    NB(1024) \
-    NB(1025) \
-    NB(1026) \
-    \
-    NB(2048) \
-    \
-    NB(4094) \
-    NB(4095) \
-    NB(4096) \
-    NB(4097) \
-    NB(4098) \
-    \
-    NB(8192) \
-    NB(16384)
+#define NBITS_TAB                                                              \
+	NB(1)                                                                  \
+	NB(2)                                                                  \
+	NB(3)                                                                  \
+	NB(4)                                                                  \
+	NB(5)                                                                  \
+	NB(6)                                                                  \
+	NB(7)                                                                  \
+	NB(8)                                                                  \
+	NB(9)                                                                  \
+	NB(10)                                                                 \
+	NB(11)                                                                 \
+	NB(12)                                                                 \
+	NB(13)                                                                 \
+	NB(14)                                                                 \
+	NB(15)                                                                 \
+	NB(16)                                                                 \
+	NB(17)                                                                 \
+	NB(18)                                                                 \
+	NB(19)                                                                 \
+	NB(20)                                                                 \
+	NB(21)                                                                 \
+	NB(22)                                                                 \
+	NB(23)                                                                 \
+	NB(24)                                                                 \
+	NB(25)                                                                 \
+	NB(26)                                                                 \
+	NB(27)                                                                 \
+	NB(28)                                                                 \
+	NB(29)                                                                 \
+	NB(30)                                                                 \
+	NB(31)                                                                 \
+	NB(32)                                                                 \
+                                                                               \
+	NB(33)                                                                 \
+	NB(34)                                                                 \
+	NB(35)                                                                 \
+	NB(36)                                                                 \
+	NB(37)                                                                 \
+	NB(38)                                                                 \
+	NB(39)                                                                 \
+	NB(40)                                                                 \
+	NB(41)                                                                 \
+	NB(42)                                                                 \
+	NB(43)                                                                 \
+	NB(44)                                                                 \
+	NB(45)                                                                 \
+	NB(46)                                                                 \
+	NB(47)                                                                 \
+	NB(48)                                                                 \
+	NB(49)                                                                 \
+	NB(50)                                                                 \
+	NB(51)                                                                 \
+	NB(52)                                                                 \
+	NB(53)                                                                 \
+	NB(54)                                                                 \
+	NB(55)                                                                 \
+	NB(56)                                                                 \
+	NB(57)                                                                 \
+	NB(58)                                                                 \
+	NB(59)                                                                 \
+	NB(60)                                                                 \
+	NB(61)                                                                 \
+	NB(62)                                                                 \
+	NB(63)                                                                 \
+	NB(64)                                                                 \
+	NB(65)                                                                 \
+	NB(66)                                                                 \
+	NB(67)                                                                 \
+                                                                               \
+	NB(126)                                                                \
+	NB(127)                                                                \
+	NB(128)                                                                \
+	NB(129)                                                                \
+	NB(130)                                                                \
+                                                                               \
+	NB(254)                                                                \
+	NB(255)                                                                \
+	NB(256)                                                                \
+	NB(257)                                                                \
+	NB(258)                                                                \
+                                                                               \
+	NB(510)                                                                \
+	NB(511)                                                                \
+	NB(512)                                                                \
+	NB(513)                                                                \
+	NB(514)                                                                \
+                                                                               \
+	NB(1022)                                                               \
+	NB(1023)                                                               \
+	NB(1024)                                                               \
+	NB(1025)                                                               \
+	NB(1026)                                                               \
+                                                                               \
+	NB(2048)                                                               \
+                                                                               \
+	NB(4094)                                                               \
+	NB(4095)                                                               \
+	NB(4096)                                                               \
+	NB(4097)                                                               \
+	NB(4098)                                                               \
+                                                                               \
+	NB(8192)                                                               \
+	NB(16384)
 
 #endif /* TEST_NBITS_H */
diff --git a/test/include/test/san.h b/test/include/test/san.h
index da07865c..65a235e9 100644
--- a/test/include/test/san.h
+++ b/test/include/test/san.h
@@ -1,9 +1,9 @@
 #if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG)
-#  define TEST_SAN_UAF_ALIGN_ENABLE "lg_san_uaf_align:12"
-#  define TEST_SAN_UAF_ALIGN_DISABLE "lg_san_uaf_align:-1"
+#	define TEST_SAN_UAF_ALIGN_ENABLE "lg_san_uaf_align:12"
+#	define TEST_SAN_UAF_ALIGN_DISABLE "lg_san_uaf_align:-1"
 #else
-#  define TEST_SAN_UAF_ALIGN_ENABLE ""
-#  define TEST_SAN_UAF_ALIGN_DISABLE ""
+#	define TEST_SAN_UAF_ALIGN_ENABLE ""
+#	define TEST_SAN_UAF_ALIGN_DISABLE ""
 #endif
 
 static inline bool
@@ -11,4 +11,3 @@ extent_is_guarded(tsdn_t *tsdn, void *ptr) {
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	return edata_guarded_get(edata);
 }
-
diff --git a/test/include/test/test.h b/test/include/test/test.h
index d4b65912..79f47e98 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,526 +1,503 @@
-#define ASSERT_BUFSIZE	256
+#define ASSERT_BUFSIZE 256
 
-#define verify_cmp(may_abort, t, a, b, cmp, neg_cmp, pri, ...) do {	\
-	const t a_ = (a);						\
-	const t b_ = (b);						\
-	if (!(a_ cmp b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) " #cmp " (%s) --> "				\
-		    "%" pri " " #neg_cmp " %" pri ": ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_, b_);					\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
-	}								\
-} while (0)
+#define verify_cmp(may_abort, t, a, b, cmp, neg_cmp, pri, ...)                 \
+	do {                                                                   \
+		const t a_ = (a);                                              \
+		const t b_ = (b);                                              \
+		if (!(a_ cmp b_)) {                                            \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) " #cmp                                       \
+			    " (%s) --> "                                       \
+			    "%" pri " " #neg_cmp " %" pri ": ",                \
+			    __func__, __FILE__, __LINE__, #a, #b, a_, b_);     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(false,	\
-    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
+#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...)                            \
+	verify_cmp(false, t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
 
-#define expect_ptr_eq(a, b, ...)	expect_cmp(void *, a, b, ==,	\
-    !=, "p", __VA_ARGS__)
-#define expect_ptr_ne(a, b, ...)	expect_cmp(void *, a, b, !=,	\
-    ==, "p", __VA_ARGS__)
-#define expect_ptr_null(a, ...)		expect_cmp(void *, a, NULL, ==,	\
-    !=, "p", __VA_ARGS__)
-#define expect_ptr_not_null(a, ...)	expect_cmp(void *, a, NULL, !=,	\
-    ==, "p", __VA_ARGS__)
+#define expect_ptr_eq(a, b, ...)                                               \
+	expect_cmp(void *, a, b, ==, !=, "p", __VA_ARGS__)
+#define expect_ptr_ne(a, b, ...)                                               \
+	expect_cmp(void *, a, b, !=, ==, "p", __VA_ARGS__)
+#define expect_ptr_null(a, ...)                                                \
+	expect_cmp(void *, a, NULL, ==, !=, "p", __VA_ARGS__)
+#define expect_ptr_not_null(a, ...)                                            \
+	expect_cmp(void *, a, NULL, !=, ==, "p", __VA_ARGS__)
 
-#define expect_c_eq(a, b, ...)	expect_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define expect_c_ne(a, b, ...)	expect_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define expect_c_lt(a, b, ...)	expect_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define expect_c_le(a, b, ...)	expect_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define expect_c_ge(a, b, ...)	expect_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define expect_c_gt(a, b, ...)	expect_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+#define expect_c_eq(a, b, ...) expect_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define expect_c_ne(a, b, ...) expect_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define expect_c_lt(a, b, ...) expect_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define expect_c_le(a, b, ...) expect_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define expect_c_ge(a, b, ...) expect_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define expect_c_gt(a, b, ...) expect_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
 
-#define expect_x_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define expect_x_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define expect_x_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define expect_x_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define expect_x_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define expect_x_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+#define expect_x_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define expect_x_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define expect_x_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define expect_x_le(a, b, ...) expect_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define expect_x_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define expect_x_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
 
-#define expect_d_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define expect_d_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define expect_d_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define expect_d_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define expect_d_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define expect_d_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+#define expect_d_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define expect_d_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define expect_d_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define expect_d_le(a, b, ...) expect_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define expect_d_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define expect_d_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
 
-#define expect_u_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define expect_u_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define expect_u_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define expect_u_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define expect_u_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define expect_u_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+#define expect_u_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define expect_u_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define expect_u_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define expect_u_le(a, b, ...) expect_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define expect_u_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define expect_u_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
 
-#define expect_ld_eq(a, b, ...)	expect_cmp(long, a, b, ==,	\
-    !=, "ld", __VA_ARGS__)
-#define expect_ld_ne(a, b, ...)	expect_cmp(long, a, b, !=,	\
-    ==, "ld", __VA_ARGS__)
-#define expect_ld_lt(a, b, ...)	expect_cmp(long, a, b, <,	\
-    >=, "ld", __VA_ARGS__)
-#define expect_ld_le(a, b, ...)	expect_cmp(long, a, b, <=,	\
-    >, "ld", __VA_ARGS__)
-#define expect_ld_ge(a, b, ...)	expect_cmp(long, a, b, >=,	\
-    <, "ld", __VA_ARGS__)
-#define expect_ld_gt(a, b, ...)	expect_cmp(long, a, b, >,	\
-    <=, "ld", __VA_ARGS__)
+#define expect_ld_eq(a, b, ...)                                                \
+	expect_cmp(long, a, b, ==, !=, "ld", __VA_ARGS__)
+#define expect_ld_ne(a, b, ...)                                                \
+	expect_cmp(long, a, b, !=, ==, "ld", __VA_ARGS__)
+#define expect_ld_lt(a, b, ...) expect_cmp(long, a, b, <, >=, "ld", __VA_ARGS__)
+#define expect_ld_le(a, b, ...) expect_cmp(long, a, b, <=, >, "ld", __VA_ARGS__)
+#define expect_ld_ge(a, b, ...) expect_cmp(long, a, b, >=, <, "ld", __VA_ARGS__)
+#define expect_ld_gt(a, b, ...) expect_cmp(long, a, b, >, <=, "ld", __VA_ARGS__)
 
-#define expect_lu_eq(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, ==, !=, "lu", __VA_ARGS__)
-#define expect_lu_ne(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, !=, ==, "lu", __VA_ARGS__)
-#define expect_lu_lt(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, <, >=, "lu", __VA_ARGS__)
-#define expect_lu_le(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, <=, >, "lu", __VA_ARGS__)
-#define expect_lu_ge(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, >=, <, "lu", __VA_ARGS__)
-#define expect_lu_gt(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, >, <=, "lu", __VA_ARGS__)
+#define expect_lu_eq(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, ==, !=, "lu", __VA_ARGS__)
+#define expect_lu_ne(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, !=, ==, "lu", __VA_ARGS__)
+#define expect_lu_lt(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, <, >=, "lu", __VA_ARGS__)
+#define expect_lu_le(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, <=, >, "lu", __VA_ARGS__)
+#define expect_lu_ge(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, >=, <, "lu", __VA_ARGS__)
+#define expect_lu_gt(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, >, <=, "lu", __VA_ARGS__)
 
-#define expect_qd_eq(a, b, ...)	expect_cmp(long long, a, b, ==,	\
-    !=, "qd", __VA_ARGS__)
-#define expect_qd_ne(a, b, ...)	expect_cmp(long long, a, b, !=,	\
-    ==, "qd", __VA_ARGS__)
-#define expect_qd_lt(a, b, ...)	expect_cmp(long long, a, b, <,	\
-    >=, "qd", __VA_ARGS__)
-#define expect_qd_le(a, b, ...)	expect_cmp(long long, a, b, <=,	\
-    >, "qd", __VA_ARGS__)
-#define expect_qd_ge(a, b, ...)	expect_cmp(long long, a, b, >=,	\
-    <, "qd", __VA_ARGS__)
-#define expect_qd_gt(a, b, ...)	expect_cmp(long long, a, b, >,	\
-    <=, "qd", __VA_ARGS__)
+#define expect_qd_eq(a, b, ...)                                                \
+	expect_cmp(long long, a, b, ==, !=, "qd", __VA_ARGS__)
+#define expect_qd_ne(a, b, ...)                                                \
+	expect_cmp(long long, a, b, !=, ==, "qd", __VA_ARGS__)
+#define expect_qd_lt(a, b, ...)                                                \
+	expect_cmp(long long, a, b, <, >=, "qd", __VA_ARGS__)
+#define expect_qd_le(a, b, ...)                                                \
+	expect_cmp(long long, a, b, <=, >, "qd", __VA_ARGS__)
+#define expect_qd_ge(a, b, ...)                                                \
+	expect_cmp(long long, a, b, >=, <, "qd", __VA_ARGS__)
+#define expect_qd_gt(a, b, ...)                                                \
+	expect_cmp(long long, a, b, >, <=, "qd", __VA_ARGS__)
 
-#define expect_qu_eq(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, ==, !=, "qu", __VA_ARGS__)
-#define expect_qu_ne(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, !=, ==, "qu", __VA_ARGS__)
-#define expect_qu_lt(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, <, >=, "qu", __VA_ARGS__)
-#define expect_qu_le(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, <=, >, "qu", __VA_ARGS__)
-#define expect_qu_ge(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, >=, <, "qu", __VA_ARGS__)
-#define expect_qu_gt(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, >, <=, "qu", __VA_ARGS__)
+#define expect_qu_eq(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, ==, !=, "qu", __VA_ARGS__)
+#define expect_qu_ne(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, !=, ==, "qu", __VA_ARGS__)
+#define expect_qu_lt(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, <, >=, "qu", __VA_ARGS__)
+#define expect_qu_le(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, <=, >, "qu", __VA_ARGS__)
+#define expect_qu_ge(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, >=, <, "qu", __VA_ARGS__)
+#define expect_qu_gt(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, >, <=, "qu", __VA_ARGS__)
 
-#define expect_jd_eq(a, b, ...)	expect_cmp(intmax_t, a, b, ==,	\
-    !=, "jd", __VA_ARGS__)
-#define expect_jd_ne(a, b, ...)	expect_cmp(intmax_t, a, b, !=,	\
-    ==, "jd", __VA_ARGS__)
-#define expect_jd_lt(a, b, ...)	expect_cmp(intmax_t, a, b, <,	\
-    >=, "jd", __VA_ARGS__)
-#define expect_jd_le(a, b, ...)	expect_cmp(intmax_t, a, b, <=,	\
-    >, "jd", __VA_ARGS__)
-#define expect_jd_ge(a, b, ...)	expect_cmp(intmax_t, a, b, >=,	\
-    <, "jd", __VA_ARGS__)
-#define expect_jd_gt(a, b, ...)	expect_cmp(intmax_t, a, b, >,	\
-    <=, "jd", __VA_ARGS__)
+#define expect_jd_eq(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, ==, !=, "jd", __VA_ARGS__)
+#define expect_jd_ne(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, !=, ==, "jd", __VA_ARGS__)
+#define expect_jd_lt(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, <, >=, "jd", __VA_ARGS__)
+#define expect_jd_le(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, <=, >, "jd", __VA_ARGS__)
+#define expect_jd_ge(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, >=, <, "jd", __VA_ARGS__)
+#define expect_jd_gt(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, >, <=, "jd", __VA_ARGS__)
 
-#define expect_ju_eq(a, b, ...)	expect_cmp(uintmax_t, a, b, ==,	\
-    !=, "ju", __VA_ARGS__)
-#define expect_ju_ne(a, b, ...)	expect_cmp(uintmax_t, a, b, !=,	\
-    ==, "ju", __VA_ARGS__)
-#define expect_ju_lt(a, b, ...)	expect_cmp(uintmax_t, a, b, <,	\
-    >=, "ju", __VA_ARGS__)
-#define expect_ju_le(a, b, ...)	expect_cmp(uintmax_t, a, b, <=,	\
-    >, "ju", __VA_ARGS__)
-#define expect_ju_ge(a, b, ...)	expect_cmp(uintmax_t, a, b, >=,	\
-    <, "ju", __VA_ARGS__)
-#define expect_ju_gt(a, b, ...)	expect_cmp(uintmax_t, a, b, >,	\
-    <=, "ju", __VA_ARGS__)
+#define expect_ju_eq(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, ==, !=, "ju", __VA_ARGS__)
+#define expect_ju_ne(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, !=, ==, "ju", __VA_ARGS__)
+#define expect_ju_lt(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, <, >=, "ju", __VA_ARGS__)
+#define expect_ju_le(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, <=, >, "ju", __VA_ARGS__)
+#define expect_ju_ge(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, >=, <, "ju", __VA_ARGS__)
+#define expect_ju_gt(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, >, <=, "ju", __VA_ARGS__)
 
-#define expect_zd_eq(a, b, ...)	expect_cmp(ssize_t, a, b, ==,	\
-    !=, "zd", __VA_ARGS__)
-#define expect_zd_ne(a, b, ...)	expect_cmp(ssize_t, a, b, !=,	\
-    ==, "zd", __VA_ARGS__)
-#define expect_zd_lt(a, b, ...)	expect_cmp(ssize_t, a, b, <,	\
-    >=, "zd", __VA_ARGS__)
-#define expect_zd_le(a, b, ...)	expect_cmp(ssize_t, a, b, <=,	\
-    >, "zd", __VA_ARGS__)
-#define expect_zd_ge(a, b, ...)	expect_cmp(ssize_t, a, b, >=,	\
-    <, "zd", __VA_ARGS__)
-#define expect_zd_gt(a, b, ...)	expect_cmp(ssize_t, a, b, >,	\
-    <=, "zd", __VA_ARGS__)
+#define expect_zd_eq(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, ==, !=, "zd", __VA_ARGS__)
+#define expect_zd_ne(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, !=, ==, "zd", __VA_ARGS__)
+#define expect_zd_lt(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, <, >=, "zd", __VA_ARGS__)
+#define expect_zd_le(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, <=, >, "zd", __VA_ARGS__)
+#define expect_zd_ge(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, >=, <, "zd", __VA_ARGS__)
+#define expect_zd_gt(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, >, <=, "zd", __VA_ARGS__)
 
-#define expect_zu_eq(a, b, ...)	expect_cmp(size_t, a, b, ==,	\
-    !=, "zu", __VA_ARGS__)
-#define expect_zu_ne(a, b, ...)	expect_cmp(size_t, a, b, !=,	\
-    ==, "zu", __VA_ARGS__)
-#define expect_zu_lt(a, b, ...)	expect_cmp(size_t, a, b, <,	\
-    >=, "zu", __VA_ARGS__)
-#define expect_zu_le(a, b, ...)	expect_cmp(size_t, a, b, <=,	\
-    >, "zu", __VA_ARGS__)
-#define expect_zu_ge(a, b, ...)	expect_cmp(size_t, a, b, >=,	\
-    <, "zu", __VA_ARGS__)
-#define expect_zu_gt(a, b, ...)	expect_cmp(size_t, a, b, >,	\
-    <=, "zu", __VA_ARGS__)
+#define expect_zu_eq(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, ==, !=, "zu", __VA_ARGS__)
+#define expect_zu_ne(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, !=, ==, "zu", __VA_ARGS__)
+#define expect_zu_lt(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, <, >=, "zu", __VA_ARGS__)
+#define expect_zu_le(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, <=, >, "zu", __VA_ARGS__)
+#define expect_zu_ge(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, >=, <, "zu", __VA_ARGS__)
+#define expect_zu_gt(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, >, <=, "zu", __VA_ARGS__)
 
-#define expect_d32_eq(a, b, ...)	expect_cmp(int32_t, a, b, ==,	\
-    !=, FMTd32, __VA_ARGS__)
-#define expect_d32_ne(a, b, ...)	expect_cmp(int32_t, a, b, !=,	\
-    ==, FMTd32, __VA_ARGS__)
-#define expect_d32_lt(a, b, ...)	expect_cmp(int32_t, a, b, <,	\
-    >=, FMTd32, __VA_ARGS__)
-#define expect_d32_le(a, b, ...)	expect_cmp(int32_t, a, b, <=,	\
-    >, FMTd32, __VA_ARGS__)
-#define expect_d32_ge(a, b, ...)	expect_cmp(int32_t, a, b, >=,	\
-    <, FMTd32, __VA_ARGS__)
-#define expect_d32_gt(a, b, ...)	expect_cmp(int32_t, a, b, >,	\
-    <=, FMTd32, __VA_ARGS__)
+#define expect_d32_eq(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, ==, !=, FMTd32, __VA_ARGS__)
+#define expect_d32_ne(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, !=, ==, FMTd32, __VA_ARGS__)
+#define expect_d32_lt(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, <, >=, FMTd32, __VA_ARGS__)
+#define expect_d32_le(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, <=, >, FMTd32, __VA_ARGS__)
+#define expect_d32_ge(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, >=, <, FMTd32, __VA_ARGS__)
+#define expect_d32_gt(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, >, <=, FMTd32, __VA_ARGS__)
 
-#define expect_u32_eq(a, b, ...)	expect_cmp(uint32_t, a, b, ==,	\
-    !=, FMTu32, __VA_ARGS__)
-#define expect_u32_ne(a, b, ...)	expect_cmp(uint32_t, a, b, !=,	\
-    ==, FMTu32, __VA_ARGS__)
-#define expect_u32_lt(a, b, ...)	expect_cmp(uint32_t, a, b, <,	\
-    >=, FMTu32, __VA_ARGS__)
-#define expect_u32_le(a, b, ...)	expect_cmp(uint32_t, a, b, <=,	\
-    >, FMTu32, __VA_ARGS__)
-#define expect_u32_ge(a, b, ...)	expect_cmp(uint32_t, a, b, >=,	\
-    <, FMTu32, __VA_ARGS__)
-#define expect_u32_gt(a, b, ...)	expect_cmp(uint32_t, a, b, >,	\
-    <=, FMTu32, __VA_ARGS__)
+#define expect_u32_eq(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, ==, !=, FMTu32, __VA_ARGS__)
+#define expect_u32_ne(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, !=, ==, FMTu32, __VA_ARGS__)
+#define expect_u32_lt(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, <, >=, FMTu32, __VA_ARGS__)
+#define expect_u32_le(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, <=, >, FMTu32, __VA_ARGS__)
+#define expect_u32_ge(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, >=, <, FMTu32, __VA_ARGS__)
+#define expect_u32_gt(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, >, <=, FMTu32, __VA_ARGS__)
 
-#define expect_d64_eq(a, b, ...)	expect_cmp(int64_t, a, b, ==,	\
-    !=, FMTd64, __VA_ARGS__)
-#define expect_d64_ne(a, b, ...)	expect_cmp(int64_t, a, b, !=,	\
-    ==, FMTd64, __VA_ARGS__)
-#define expect_d64_lt(a, b, ...)	expect_cmp(int64_t, a, b, <,	\
-    >=, FMTd64, __VA_ARGS__)
-#define expect_d64_le(a, b, ...)	expect_cmp(int64_t, a, b, <=,	\
-    >, FMTd64, __VA_ARGS__)
-#define expect_d64_ge(a, b, ...)	expect_cmp(int64_t, a, b, >=,	\
-    <, FMTd64, __VA_ARGS__)
-#define expect_d64_gt(a, b, ...)	expect_cmp(int64_t, a, b, >,	\
-    <=, FMTd64, __VA_ARGS__)
+#define expect_d64_eq(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, ==, !=, FMTd64, __VA_ARGS__)
+#define expect_d64_ne(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, !=, ==, FMTd64, __VA_ARGS__)
+#define expect_d64_lt(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, <, >=, FMTd64, __VA_ARGS__)
+#define expect_d64_le(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, <=, >, FMTd64, __VA_ARGS__)
+#define expect_d64_ge(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, >=, <, FMTd64, __VA_ARGS__)
+#define expect_d64_gt(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, >, <=, FMTd64, __VA_ARGS__)
 
-#define expect_u64_eq(a, b, ...)	expect_cmp(uint64_t, a, b, ==,	\
-    !=, FMTu64, __VA_ARGS__)
-#define expect_u64_ne(a, b, ...)	expect_cmp(uint64_t, a, b, !=,	\
-    ==, FMTu64, __VA_ARGS__)
-#define expect_u64_lt(a, b, ...)	expect_cmp(uint64_t, a, b, <,	\
-    >=, FMTu64, __VA_ARGS__)
-#define expect_u64_le(a, b, ...)	expect_cmp(uint64_t, a, b, <=,	\
-    >, FMTu64, __VA_ARGS__)
-#define expect_u64_ge(a, b, ...)	expect_cmp(uint64_t, a, b, >=,	\
-    <, FMTu64, __VA_ARGS__)
-#define expect_u64_gt(a, b, ...)	expect_cmp(uint64_t, a, b, >,	\
-    <=, FMTu64, __VA_ARGS__)
+#define expect_u64_eq(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, ==, !=, FMTu64, __VA_ARGS__)
+#define expect_u64_ne(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, !=, ==, FMTu64, __VA_ARGS__)
+#define expect_u64_lt(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, <, >=, FMTu64, __VA_ARGS__)
+#define expect_u64_le(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, <=, >, FMTu64, __VA_ARGS__)
+#define expect_u64_ge(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, >=, <, FMTu64, __VA_ARGS__)
+#define expect_u64_gt(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, >, <=, FMTu64, __VA_ARGS__)
 
-#define verify_b_eq(may_abort, a, b, ...) do {				\
-	bool a_ = (a);							\
-	bool b_ = (b);							\
-	if (!(a_ == b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) == (%s) --> %s != %s: ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_ ? "true" : "false",			\
-		    b_ ? "true" : "false");				\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
-	}								\
-} while (0)
+#define verify_b_eq(may_abort, a, b, ...)                                      \
+	do {                                                                   \
+		bool a_ = (a);                                                 \
+		bool b_ = (b);                                                 \
+		if (!(a_ == b_)) {                                             \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) == (%s) --> %s != %s: ",                     \
+			    __func__, __FILE__, __LINE__, #a, #b,              \
+			    a_ ? "true" : "false", b_ ? "true" : "false");     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define verify_b_ne(may_abort, a, b, ...) do {				\
-	bool a_ = (a);							\
-	bool b_ = (b);							\
-	if (!(a_ != b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) != (%s) --> %s == %s: ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_ ? "true" : "false",			\
-		    b_ ? "true" : "false");				\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
-	}								\
-} while (0)
+#define verify_b_ne(may_abort, a, b, ...)                                      \
+	do {                                                                   \
+		bool a_ = (a);                                                 \
+		bool b_ = (b);                                                 \
+		if (!(a_ != b_)) {                                             \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) != (%s) --> %s == %s: ",                     \
+			    __func__, __FILE__, __LINE__, #a, #b,              \
+			    a_ ? "true" : "false", b_ ? "true" : "false");     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define expect_b_eq(a, b, ...)	verify_b_eq(false, a, b, __VA_ARGS__)
-#define expect_b_ne(a, b, ...)	verify_b_ne(false, a, b, __VA_ARGS__)
+#define expect_b_eq(a, b, ...) verify_b_eq(false, a, b, __VA_ARGS__)
+#define expect_b_ne(a, b, ...) verify_b_ne(false, a, b, __VA_ARGS__)
 
-#define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
-#define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
+#define expect_true(a, ...) expect_b_eq(a, true, __VA_ARGS__)
+#define expect_false(a, ...) expect_b_eq(a, false, __VA_ARGS__)
 
-#define verify_str_eq(may_abort, a, b, ...) do {			\
-	if (strcmp((a), (b))) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) same as (%s) --> "				\
-		    "\"%s\" differs from \"%s\": ",			\
-		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
-	}								\
-} while (0)
+#define verify_str_eq(may_abort, a, b, ...)                                    \
+	do {                                                                   \
+		if (strcmp((a), (b)) != 0) {                                   \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) same as (%s) --> "                           \
+			    "\"%s\" differs from \"%s\": ",                    \
+			    __func__, __FILE__, __LINE__, #a, #b, a, b);       \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define verify_str_ne(may_abort, a, b, ...) do {			\
-	if (!strcmp((a), (b))) {					\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) differs from (%s) --> "			\
-		    "\"%s\" same as \"%s\": ",				\
-		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
-	}								\
-} while (0)
+#define verify_str_ne(may_abort, a, b, ...)                                    \
+	do {                                                                   \
+		if (strcmp((a), (b)) == 0) {                                   \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) differs from (%s) --> "                      \
+			    "\"%s\" same as \"%s\": ",                         \
+			    __func__, __FILE__, __LINE__, #a, #b, a, b);       \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
 #define expect_str_eq(a, b, ...) verify_str_eq(false, a, b, __VA_ARGS__)
 #define expect_str_ne(a, b, ...) verify_str_ne(false, a, b, __VA_ARGS__)
 
-#define verify_not_reached(may_abort, ...) do {				\
-	char prefix[ASSERT_BUFSIZE];					\
-	char message[ASSERT_BUFSIZE];					\
-	malloc_snprintf(prefix, sizeof(prefix),				\
-	    "%s:%s:%d: Unreachable code reached: ",			\
-	    __func__, __FILE__, __LINE__);				\
-	malloc_snprintf(message, sizeof(message), __VA_ARGS__);		\
-	if (may_abort) {						\
-		abort();						\
-	} else {							\
-		p_test_fail(prefix, message);				\
-	}								\
-} while (0)
+#define verify_not_reached(may_abort, ...)                                     \
+	do {                                                                   \
+		char prefix[ASSERT_BUFSIZE];                                   \
+		char message[ASSERT_BUFSIZE];                                  \
+		malloc_snprintf(prefix, sizeof(prefix),                        \
+		    "%s:%s:%d: Unreachable code reached: ", __func__,          \
+		    __FILE__, __LINE__);                                       \
+		malloc_snprintf(message, sizeof(message), __VA_ARGS__);        \
+		p_test_fail(may_abort, prefix, message);                       \
+	} while (0)
 
 #define expect_not_reached(...) verify_not_reached(false, __VA_ARGS__)
 
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(true,	\
-    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...)                            \
+	verify_cmp(true, t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
 
-#define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
-    !=, "p", __VA_ARGS__)
-#define assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
-    ==, "p", __VA_ARGS__)
-#define assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
-    !=, "p", __VA_ARGS__)
-#define assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
-    ==, "p", __VA_ARGS__)
+#define assert_ptr_eq(a, b, ...)                                               \
+	assert_cmp(void *, a, b, ==, !=, "p", __VA_ARGS__)
+#define assert_ptr_ne(a, b, ...)                                               \
+	assert_cmp(void *, a, b, !=, ==, "p", __VA_ARGS__)
+#define assert_ptr_null(a, ...)                                                \
+	assert_cmp(void *, a, NULL, ==, !=, "p", __VA_ARGS__)
+#define assert_ptr_not_null(a, ...)                                            \
+	assert_cmp(void *, a, NULL, !=, ==, "p", __VA_ARGS__)
 
-#define assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+#define assert_c_eq(a, b, ...) assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define assert_c_ne(a, b, ...) assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define assert_c_lt(a, b, ...) assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define assert_c_le(a, b, ...) assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define assert_c_ge(a, b, ...) assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define assert_c_gt(a, b, ...) assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
 
-#define assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+#define assert_x_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define assert_x_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define assert_x_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define assert_x_le(a, b, ...) assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define assert_x_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define assert_x_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
 
-#define assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+#define assert_d_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define assert_d_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define assert_d_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define assert_d_le(a, b, ...) assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define assert_d_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define assert_d_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
 
-#define assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+#define assert_u_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define assert_u_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define assert_u_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define assert_u_le(a, b, ...) assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define assert_u_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define assert_u_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
 
-#define assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
-    !=, "ld", __VA_ARGS__)
-#define assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
-    ==, "ld", __VA_ARGS__)
-#define assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
-    >=, "ld", __VA_ARGS__)
-#define assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
-    >, "ld", __VA_ARGS__)
-#define assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
-    <, "ld", __VA_ARGS__)
-#define assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
-    <=, "ld", __VA_ARGS__)
+#define assert_ld_eq(a, b, ...)                                                \
+	assert_cmp(long, a, b, ==, !=, "ld", __VA_ARGS__)
+#define assert_ld_ne(a, b, ...)                                                \
+	assert_cmp(long, a, b, !=, ==, "ld", __VA_ARGS__)
+#define assert_ld_lt(a, b, ...) assert_cmp(long, a, b, <, >=, "ld", __VA_ARGS__)
+#define assert_ld_le(a, b, ...) assert_cmp(long, a, b, <=, >, "ld", __VA_ARGS__)
+#define assert_ld_ge(a, b, ...) assert_cmp(long, a, b, >=, <, "ld", __VA_ARGS__)
+#define assert_ld_gt(a, b, ...) assert_cmp(long, a, b, >, <=, "ld", __VA_ARGS__)
 
-#define assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, ==, !=, "lu", __VA_ARGS__)
-#define assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, !=, ==, "lu", __VA_ARGS__)
-#define assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, <, >=, "lu", __VA_ARGS__)
-#define assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, <=, >, "lu", __VA_ARGS__)
-#define assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, >=, <, "lu", __VA_ARGS__)
-#define assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, >, <=, "lu", __VA_ARGS__)
+#define assert_lu_eq(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, ==, !=, "lu", __VA_ARGS__)
+#define assert_lu_ne(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, !=, ==, "lu", __VA_ARGS__)
+#define assert_lu_lt(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, <, >=, "lu", __VA_ARGS__)
+#define assert_lu_le(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, <=, >, "lu", __VA_ARGS__)
+#define assert_lu_ge(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, >=, <, "lu", __VA_ARGS__)
+#define assert_lu_gt(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, >, <=, "lu", __VA_ARGS__)
 
-#define assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
-    !=, "qd", __VA_ARGS__)
-#define assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
-    ==, "qd", __VA_ARGS__)
-#define assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
-    >=, "qd", __VA_ARGS__)
-#define assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
-    >, "qd", __VA_ARGS__)
-#define assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
-    <, "qd", __VA_ARGS__)
-#define assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
-    <=, "qd", __VA_ARGS__)
+#define assert_qd_eq(a, b, ...)                                                \
+	assert_cmp(long long, a, b, ==, !=, "qd", __VA_ARGS__)
+#define assert_qd_ne(a, b, ...)                                                \
+	assert_cmp(long long, a, b, !=, ==, "qd", __VA_ARGS__)
+#define assert_qd_lt(a, b, ...)                                                \
+	assert_cmp(long long, a, b, <, >=, "qd", __VA_ARGS__)
+#define assert_qd_le(a, b, ...)                                                \
+	assert_cmp(long long, a, b, <=, >, "qd", __VA_ARGS__)
+#define assert_qd_ge(a, b, ...)                                                \
+	assert_cmp(long long, a, b, >=, <, "qd", __VA_ARGS__)
+#define assert_qd_gt(a, b, ...)                                                \
+	assert_cmp(long long, a, b, >, <=, "qd", __VA_ARGS__)
 
-#define assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, ==, !=, "qu", __VA_ARGS__)
-#define assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, !=, ==, "qu", __VA_ARGS__)
-#define assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, <, >=, "qu", __VA_ARGS__)
-#define assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, <=, >, "qu", __VA_ARGS__)
-#define assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, >=, <, "qu", __VA_ARGS__)
-#define assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, >, <=, "qu", __VA_ARGS__)
+#define assert_qu_eq(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, ==, !=, "qu", __VA_ARGS__)
+#define assert_qu_ne(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, !=, ==, "qu", __VA_ARGS__)
+#define assert_qu_lt(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, <, >=, "qu", __VA_ARGS__)
+#define assert_qu_le(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, <=, >, "qu", __VA_ARGS__)
+#define assert_qu_ge(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, >=, <, "qu", __VA_ARGS__)
+#define assert_qu_gt(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, >, <=, "qu", __VA_ARGS__)
 
-#define assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
-    !=, "jd", __VA_ARGS__)
-#define assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
-    ==, "jd", __VA_ARGS__)
-#define assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
-    >=, "jd", __VA_ARGS__)
-#define assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
-    >, "jd", __VA_ARGS__)
-#define assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
-    <, "jd", __VA_ARGS__)
-#define assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
-    <=, "jd", __VA_ARGS__)
+#define assert_jd_eq(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, ==, !=, "jd", __VA_ARGS__)
+#define assert_jd_ne(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, !=, ==, "jd", __VA_ARGS__)
+#define assert_jd_lt(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, <, >=, "jd", __VA_ARGS__)
+#define assert_jd_le(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, <=, >, "jd", __VA_ARGS__)
+#define assert_jd_ge(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, >=, <, "jd", __VA_ARGS__)
+#define assert_jd_gt(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, >, <=, "jd", __VA_ARGS__)
 
-#define assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
-    !=, "ju", __VA_ARGS__)
-#define assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
-    ==, "ju", __VA_ARGS__)
-#define assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
-    >=, "ju", __VA_ARGS__)
-#define assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
-    >, "ju", __VA_ARGS__)
-#define assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
-    <, "ju", __VA_ARGS__)
-#define assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
-    <=, "ju", __VA_ARGS__)
+#define assert_ju_eq(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, ==, !=, "ju", __VA_ARGS__)
+#define assert_ju_ne(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, !=, ==, "ju", __VA_ARGS__)
+#define assert_ju_lt(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, <, >=, "ju", __VA_ARGS__)
+#define assert_ju_le(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, <=, >, "ju", __VA_ARGS__)
+#define assert_ju_ge(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, >=, <, "ju", __VA_ARGS__)
+#define assert_ju_gt(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, >, <=, "ju", __VA_ARGS__)
 
-#define assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
-    !=, "zd", __VA_ARGS__)
-#define assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
-    ==, "zd", __VA_ARGS__)
-#define assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
-    >=, "zd", __VA_ARGS__)
-#define assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
-    >, "zd", __VA_ARGS__)
-#define assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
-    <, "zd", __VA_ARGS__)
-#define assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
-    <=, "zd", __VA_ARGS__)
+#define assert_zd_eq(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, ==, !=, "zd", __VA_ARGS__)
+#define assert_zd_ne(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, !=, ==, "zd", __VA_ARGS__)
+#define assert_zd_lt(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, <, >=, "zd", __VA_ARGS__)
+#define assert_zd_le(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, <=, >, "zd", __VA_ARGS__)
+#define assert_zd_ge(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, >=, <, "zd", __VA_ARGS__)
+#define assert_zd_gt(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, >, <=, "zd", __VA_ARGS__)
 
-#define assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
-    !=, "zu", __VA_ARGS__)
-#define assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
-    ==, "zu", __VA_ARGS__)
-#define assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
-    >=, "zu", __VA_ARGS__)
-#define assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
-    >, "zu", __VA_ARGS__)
-#define assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
-    <, "zu", __VA_ARGS__)
-#define assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
-    <=, "zu", __VA_ARGS__)
+#define assert_zu_eq(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, ==, !=, "zu", __VA_ARGS__)
+#define assert_zu_ne(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, !=, ==, "zu", __VA_ARGS__)
+#define assert_zu_lt(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, <, >=, "zu", __VA_ARGS__)
+#define assert_zu_le(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, <=, >, "zu", __VA_ARGS__)
+#define assert_zu_ge(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, >=, <, "zu", __VA_ARGS__)
+#define assert_zu_gt(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, >, <=, "zu", __VA_ARGS__)
 
-#define assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
-    !=, FMTd32, __VA_ARGS__)
-#define assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
-    ==, FMTd32, __VA_ARGS__)
-#define assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
-    >=, FMTd32, __VA_ARGS__)
-#define assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
-    >, FMTd32, __VA_ARGS__)
-#define assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
-    <, FMTd32, __VA_ARGS__)
-#define assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
-    <=, FMTd32, __VA_ARGS__)
+#define assert_d32_eq(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, ==, !=, FMTd32, __VA_ARGS__)
+#define assert_d32_ne(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, !=, ==, FMTd32, __VA_ARGS__)
+#define assert_d32_lt(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, <, >=, FMTd32, __VA_ARGS__)
+#define assert_d32_le(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, <=, >, FMTd32, __VA_ARGS__)
+#define assert_d32_ge(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, >=, <, FMTd32, __VA_ARGS__)
+#define assert_d32_gt(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, >, <=, FMTd32, __VA_ARGS__)
 
-#define assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
-    !=, FMTu32, __VA_ARGS__)
-#define assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
-    ==, FMTu32, __VA_ARGS__)
-#define assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
-    >=, FMTu32, __VA_ARGS__)
-#define assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
-    >, FMTu32, __VA_ARGS__)
-#define assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
-    <, FMTu32, __VA_ARGS__)
-#define assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
-    <=, FMTu32, __VA_ARGS__)
+#define assert_u32_eq(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, ==, !=, FMTu32, __VA_ARGS__)
+#define assert_u32_ne(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, !=, ==, FMTu32, __VA_ARGS__)
+#define assert_u32_lt(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, <, >=, FMTu32, __VA_ARGS__)
+#define assert_u32_le(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, <=, >, FMTu32, __VA_ARGS__)
+#define assert_u32_ge(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, >=, <, FMTu32, __VA_ARGS__)
+#define assert_u32_gt(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, >, <=, FMTu32, __VA_ARGS__)
 
-#define assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
-    !=, FMTd64, __VA_ARGS__)
-#define assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
-    ==, FMTd64, __VA_ARGS__)
-#define assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
-    >=, FMTd64, __VA_ARGS__)
-#define assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
-    >, FMTd64, __VA_ARGS__)
-#define assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
-    <, FMTd64, __VA_ARGS__)
-#define assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
-    <=, FMTd64, __VA_ARGS__)
+#define assert_d64_eq(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, ==, !=, FMTd64, __VA_ARGS__)
+#define assert_d64_ne(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, !=, ==, FMTd64, __VA_ARGS__)
+#define assert_d64_lt(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, <, >=, FMTd64, __VA_ARGS__)
+#define assert_d64_le(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, <=, >, FMTd64, __VA_ARGS__)
+#define assert_d64_ge(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, >=, <, FMTd64, __VA_ARGS__)
+#define assert_d64_gt(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, >, <=, FMTd64, __VA_ARGS__)
 
-#define assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
-    !=, FMTu64, __VA_ARGS__)
-#define assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
-    ==, FMTu64, __VA_ARGS__)
-#define assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
-    >=, FMTu64, __VA_ARGS__)
-#define assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
-    >, FMTu64, __VA_ARGS__)
-#define assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
-    <, FMTu64, __VA_ARGS__)
-#define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
-    <=, FMTu64, __VA_ARGS__)
+#define assert_u64_eq(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, ==, !=, FMTu64, __VA_ARGS__)
+#define assert_u64_ne(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, !=, ==, FMTu64, __VA_ARGS__)
+#define assert_u64_lt(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, <, >=, FMTu64, __VA_ARGS__)
+#define assert_u64_le(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, <=, >, FMTu64, __VA_ARGS__)
+#define assert_u64_ge(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, >=, <, FMTu64, __VA_ARGS__)
+#define assert_u64_gt(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, >, <=, FMTu64, __VA_ARGS__)
 
-#define assert_b_eq(a, b, ...)	verify_b_eq(true, a, b, __VA_ARGS__)
-#define assert_b_ne(a, b, ...)	verify_b_ne(true, a, b, __VA_ARGS__)
+#define assert_b_eq(a, b, ...) verify_b_eq(true, a, b, __VA_ARGS__)
+#define assert_b_ne(a, b, ...) verify_b_ne(true, a, b, __VA_ARGS__)
 
-#define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
-#define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
+#define assert_true(a, ...) assert_b_eq(a, true, __VA_ARGS__)
+#define assert_false(a, ...) assert_b_eq(a, false, __VA_ARGS__)
 
 #define assert_str_eq(a, b, ...) verify_str_eq(true, a, b, __VA_ARGS__)
 #define assert_str_ne(a, b, ...) verify_str_ne(true, a, b, __VA_ARGS__)
@@ -539,45 +516,45 @@ typedef enum {
 	test_status_count = 3
 } test_status_t;
 
-typedef void (test_t)(void);
+typedef void(test_t)(void);
 
-#define TEST_BEGIN(f)							\
-static void								\
-f(void) {								\
-	p_test_init(#f);
+#define TEST_BEGIN(f)                                                          \
+	static void f(void) {                                                  \
+		const bool skip_test = p_test_init(#f);                        \
+		if (skip_test) {                                               \
+			goto label_test_end;                                   \
+		}
 
-#define TEST_END							\
-	goto label_test_end;						\
-label_test_end:								\
-	p_test_fini();							\
-}
+#define TEST_END                                                               \
+	goto label_test_end;                                                   \
+	label_test_end:                                                        \
+	p_test_fini(skip_test);                                                \
+	}
 
-#define test(...)							\
-	p_test(__VA_ARGS__, NULL)
+#define test(...) p_test(__VA_ARGS__, NULL)
 
-#define test_no_reentrancy(...)							\
-	p_test_no_reentrancy(__VA_ARGS__, NULL)
+#define test_no_reentrancy(...) p_test_no_reentrancy(__VA_ARGS__, NULL)
 
-#define test_no_malloc_init(...)					\
-	p_test_no_malloc_init(__VA_ARGS__, NULL)
+#define test_no_malloc_init(...) p_test_no_malloc_init(__VA_ARGS__, NULL)
 
-#define test_skip_if(e) do {						\
-	if (e) {							\
-		test_skip("%s:%s:%d: Test skipped: (%s)",		\
-		    __func__, __FILE__, __LINE__, #e);			\
-		goto label_test_end;					\
-	}								\
-} while (0)
+#define test_skip_if(e)                                                        \
+	do {                                                                   \
+		if (e) {                                                       \
+			test_skip("%s:%s:%d: Test skipped: (%s)", __func__,    \
+			    __FILE__, __LINE__, #e);                           \
+			goto label_test_end;                                   \
+		}                                                              \
+	} while (0)
 
-bool test_is_reentrant();
+bool test_is_reentrant(void);
 
-void	test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
-void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+void test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 /* For private use by macros. */
-test_status_t	p_test(test_t *t, ...);
-test_status_t	p_test_no_reentrancy(test_t *t, ...);
-test_status_t	p_test_no_malloc_init(test_t *t, ...);
-void	p_test_init(const char *name);
-void	p_test_fini(void);
-void	p_test_fail(const char *prefix, const char *message);
+test_status_t p_test(test_t *t, ...);
+test_status_t p_test_no_reentrancy(test_t *t, ...);
+test_status_t p_test_no_malloc_init(test_t *t, ...);
+bool          p_test_init(const char *name);
+void          p_test_fini(bool skip_test);
+void p_test_fail(bool may_abort, const char *prefix, const char *message);
diff --git a/test/include/test/thd.h b/test/include/test/thd.h
index 47a51262..848c5271 100644
--- a/test/include/test/thd.h
+++ b/test/include/test/thd.h
@@ -5,5 +5,7 @@ typedef HANDLE thd_t;
 typedef pthread_t thd_t;
 #endif
 
-void	thd_create(thd_t *thd, void *(*proc)(void *), void *arg);
-void	thd_join(thd_t thd, void **ret);
+void thd_create(thd_t *thd, void *(*proc)(void *), void *arg);
+void thd_join(thd_t thd, void **ret);
+bool thd_has_setname(void);
+void thd_setname(const char *name);
diff --git a/test/include/test/timer.h b/test/include/test/timer.h
index ace6191b..c1d59eb4 100644
--- a/test/include/test/timer.h
+++ b/test/include/test/timer.h
@@ -5,7 +5,7 @@ typedef struct {
 	nstime_t t1;
 } timedelta_t;
 
-void	timer_start(timedelta_t *timer);
-void	timer_stop(timedelta_t *timer);
-uint64_t	timer_usec(const timedelta_t *timer);
-void	timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen);
+void     timer_start(timedelta_t *timer);
+void     timer_stop(timedelta_t *timer);
+uint64_t timer_usec(const timedelta_t *timer);
+void     timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen);
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 7e61df08..c81566a8 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -2,39 +2,31 @@
 
 #define NTHREADS 10
 
-static bool have_dss =
-#ifdef JEMALLOC_DSS
-    true
-#else
-    false
-#endif
-    ;
-
 void *
 thd_start(void *arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)arg;
 	unsigned arena_ind;
-	void *p;
-	size_t sz;
+	void    *p;
+	size_t   sz;
 
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Error in arenas.create");
 
 	if (thread_ind % 4 != 3) {
-		size_t mib[3];
-		size_t miblen = sizeof(mib) / sizeof(size_t);
+		size_t      mib[3];
+		size_t      miblen = sizeof(mib) / sizeof(size_t);
 		const char *dss_precs[] = {"disabled", "primary", "secondary"};
-		unsigned prec_ind = thread_ind %
-		    (sizeof(dss_precs)/sizeof(char*));
+		unsigned    prec_ind = thread_ind
+		    % (sizeof(dss_precs) / sizeof(char *));
 		const char *dss = dss_precs[prec_ind];
 		int expected_err = (have_dss || prec_ind == 0) ? 0 : EFAULT;
 		expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 		    "Error in mallctlnametomib()");
 		mib[1] = arena_ind;
 		expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
-		    sizeof(const char *)), expected_err,
-		    "Error in mallctlbymib()");
+		                sizeof(const char *)),
+		    expected_err, "Error in mallctlbymib()");
 	}
 
 	p = mallocx(1, MALLOCX_ARENA(arena_ind));
@@ -45,12 +37,11 @@ thd_start(void *arg) {
 }
 
 TEST_BEGIN(test_MALLOCX_ARENA) {
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned i;
 
 	for (i = 0; i < NTHREADS; i++) {
-		thd_create(&thds[i], thd_start,
-		    (void *)(uintptr_t)i);
+		thd_create(&thds[i], thd_start, (void *)(uintptr_t)i);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -61,6 +52,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_MALLOCX_ARENA);
+	return test(test_MALLOCX_ARENA);
 }
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index b37d5ba0..1cf2a2f1 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -15,7 +15,7 @@ purge(void) {
 
 TEST_BEGIN(test_alignment_errors) {
 	size_t alignment;
-	void *p;
+	void  *p;
 
 	alignment = 0;
 	set_errno(0);
@@ -24,17 +24,15 @@ TEST_BEGIN(test_alignment_errors) {
 	    "Expected error for invalid alignment %zu", alignment);
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
-	    alignment <<= 1) {
+	     alignment <<= 1) {
 		set_errno(0);
 		p = aligned_alloc(alignment + 1, 1);
 		expect_false(p != NULL || get_errno() != EINVAL,
-		    "Expected error for invalid alignment %zu",
-		    alignment + 1);
+		    "Expected error for invalid alignment %zu", alignment + 1);
 	}
 }
 TEST_END
 
-
 /*
  * GCC "-Walloc-size-larger-than" warning detects when one of the memory
  * allocation functions is called with a size larger than the maximum size that
@@ -47,33 +45,31 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
-	void *p;
+	void  *p;
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x8000000000000000);
-	size      = UINT64_C(0x8000000000000000);
+	size = UINT64_C(0x8000000000000000);
 #else
 	alignment = 0x80000000LU;
-	size      = 0x80000000LU;
+	size = 0x80000000LU;
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(%zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(%zu, %zu)", alignment, size);
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x4000000000000000);
-	size      = UINT64_C(0xc000000000000001);
+	size = UINT64_C(0xc000000000000001);
 #else
 	alignment = 0x40000000LU;
-	size      = 0xc0000001LU;
+	size = 0xc0000001LU;
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(%zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(%zu, %zu)", alignment, size);
 
 	alignment = 0x10LU;
 #if LG_SIZEOF_PTR == 3
@@ -84,8 +80,7 @@ TEST_BEGIN(test_oom_errors) {
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(&p, %zu, %zu)", alignment, size);
 }
 TEST_END
 
@@ -94,21 +89,18 @@ JEMALLOC_DIAGNOSTIC_POP
 
 TEST_BEGIN(test_alignment_and_size) {
 #define NITER 4
-	size_t alignment, size, total;
+	size_t   alignment, size, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (size = 1;
-		    size < 3 * alignment && size < (1U << 31);
-		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (size = 1; size < 3 * alignment && size < (1U << 31);
+		     size += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
 				ps[i] = aligned_alloc(alignment, size);
 				if (ps[i] == NULL) {
@@ -149,9 +141,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_alignment_errors,
-	    test_oom_errors,
-	    test_alignment_and_size,
-	    test_zero_alloc);
+	return test(test_alignment_errors, test_oom_errors,
+	    test_alignment_and_size, test_zero_alloc);
 }
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 0c64272c..2c46d916 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -1,36 +1,28 @@
 #include "test/jemalloc_test.h"
 
-static const bool config_stats =
-#ifdef JEMALLOC_STATS
-    true
-#else
-    false
-#endif
-    ;
-
 void *
 thd_start(void *arg) {
-	int err;
-	void *p;
-	uint64_t a0, a1, d0, d1;
+	int       err;
+	void     *p;
+	uint64_t  a0, a1, d0, d1;
 	uint64_t *ap0, *ap1, *dp0, *dp1;
-	size_t sz, usize;
+	size_t    sz, usize;
 
 	sz = sizeof(a0);
 	if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	sz = sizeof(ap0);
 	if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	expect_u64_eq(*ap0, a0,
 	    "\"thread.allocatedp\" should provide a pointer to internal "
@@ -41,17 +33,17 @@ thd_start(void *arg) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	sz = sizeof(dp0);
-	if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL,
-	    0))) {
+	if ((err = mallctl(
+	         "thread.deallocatedp", (void *)&dp0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	expect_u64_eq(*dp0, d0,
 	    "\"thread.deallocatedp\" should provide a pointer to internal "
@@ -115,10 +107,6 @@ TEST_END
 int
 main(void) {
 	/* Run tests multiple times to check for bad interactions. */
-	return test(
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread);
+	return test(test_main_thread, test_subthread, test_main_thread,
+	    test_subthread, test_main_thread);
 }
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index c1cf6cd8..e0341176 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -19,6 +19,5 @@ TEST_END
 
 int
 main() {
-	return test(
-	    test_basic);
+	return test(test_basic);
 }
diff --git a/test/integration/cpp/infallible_new_false.cpp b/test/integration/cpp/infallible_new_false.cpp
index 42196d6a..5ba4f49e 100644
--- a/test/integration/cpp/infallible_new_false.cpp
+++ b/test/integration/cpp/infallible_new_false.cpp
@@ -17,7 +17,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_failing_alloc);
+	return test(test_failing_alloc);
 }
-
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index d6754128..300bdd85 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -8,9 +8,10 @@
  */
 typedef void (*abort_hook_t)(const char *message);
 bool fake_abort_called;
-void fake_abort(const char *message) {
-	if (strcmp(message, "<jemalloc>: Allocation failed and "
-	    "opt.experimental_infallible_new is true. Aborting.\n") != 0) {
+void
+fake_abort(const char *message) {
+	const char *expected_start = "<jemalloc>: Allocation of size";
+	if (strncmp(message, expected_start, strlen(expected_start)) != 0) {
 		abort();
 	}
 	fake_abort_called = true;
@@ -19,7 +20,7 @@ void fake_abort(const char *message) {
 static bool
 own_operator_new(void) {
 	uint64_t before, after;
-	size_t sz = sizeof(before);
+	size_t   sz = sizeof(before);
 
 	/* thread.allocated is always available, even w/o config_stats. */
 	expect_d_eq(mallctl("thread.allocated", (void *)&before, &sz, NULL, 0),
@@ -35,8 +36,8 @@ own_operator_new(void) {
 TEST_BEGIN(test_failing_alloc) {
 	abort_hook_t abort_hook = &fake_abort;
 	expect_d_eq(mallctl("experimental.hooks.safety_check_abort", NULL, NULL,
-	    (void *)&abort_hook, sizeof(abort_hook)), 0,
-	    "Unexpected mallctl failure setting abort hook");
+	                (void *)&abort_hook, sizeof(abort_hook)),
+	    0, "Unexpected mallctl failure setting abort hook");
 
 	/*
 	 * Not owning operator new is only expected to happen on MinGW which
@@ -61,7 +62,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_failing_alloc);
+	return test(test_failing_alloc);
 }
-
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 7a028f18..c15bf761 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -6,26 +6,29 @@
 
 static void
 test_extent_body(unsigned arena_ind) {
-	void *p;
+	void  *p;
 	size_t large0, large1, large2, sz;
 	size_t purge_mib[3];
 	size_t purge_miblen;
-	int flags;
-	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
+	int    flags;
+	bool   xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.0.size failure");
-	expect_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.1.size failure");
-	expect_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.2.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.0.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.1.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
-	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
+	purge_miblen = sizeof(purge_mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	purge_mib[1] = (size_t)arena_ind;
@@ -47,8 +50,8 @@ test_extent_body(unsigned arena_ind) {
 	if (xallocx_success_a) {
 		expect_true(called_dalloc, "Expected dalloc call");
 		expect_true(called_decommit, "Expected decommit call");
-		expect_true(did_purge_lazy || did_purge_forced,
-		    "Expected purge");
+		expect_true(
+		    did_purge_lazy || did_purge_forced, "Expected purge");
 		expect_true(called_split, "Expected split call");
 	}
 	dallocx(p, flags);
@@ -72,8 +75,8 @@ test_extent_body(unsigned arena_ind) {
 	}
 	xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
 	if (did_split) {
-		expect_b_eq(did_decommit, did_commit,
-		    "Expected decommit/commit match");
+		expect_b_eq(
+		    did_decommit, did_commit, "Expected decommit/commit match");
 	}
 	if (xallocx_success_b && xallocx_success_c) {
 		expect_true(did_merge, "Expected merge");
@@ -90,33 +93,34 @@ test_extent_body(unsigned arena_ind) {
 
 static void
 test_manual_hook_auto_arena(void) {
-	unsigned narenas;
-	size_t old_size, new_size, sz;
-	size_t hooks_mib[3];
-	size_t hooks_miblen;
+	unsigned        narenas;
+	size_t          old_size, new_size, sz;
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen;
 	extent_hooks_t *new_hooks, *old_hooks;
 
 	extent_hooks_prep();
 
 	sz = sizeof(unsigned);
 	/* Get number of auto arenas. */
-	expect_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	if (narenas == 1) {
 		return;
 	}
 
 	/* Install custom extent hooks on arena 1 (might not be initialized). */
-	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
-	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = 1;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, (void *)&new_hooks, new_size), 0,
-	    "Unexpected extent_hooks error");
+	                &old_size, (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	static bool auto_arena_created = false;
 	if (old_hooks != &hooks) {
 		expect_b_eq(auto_arena_created, false,
@@ -127,10 +131,10 @@ test_manual_hook_auto_arena(void) {
 
 static void
 test_manual_hook_body(void) {
-	unsigned arena_ind;
-	size_t old_size, new_size, sz;
-	size_t hooks_mib[3];
-	size_t hooks_miblen;
+	unsigned        arena_ind;
+	size_t          old_size, new_size, sz;
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen;
 	extent_hooks_t *new_hooks, *old_hooks;
 
 	extent_hooks_prep();
@@ -140,16 +144,17 @@ test_manual_hook_body(void) {
 	    0, "Unexpected mallctl() failure");
 
 	/* Install custom extent hooks. */
-	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
-	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, (void *)&new_hooks, new_size), 0,
-	    "Unexpected extent_hooks error");
+	                &old_size, (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	expect_ptr_ne(old_hooks->alloc, extent_alloc_hook,
 	    "Unexpected extent_hooks error");
 	expect_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
@@ -173,10 +178,13 @@ test_manual_hook_body(void) {
 
 	/* Restore extent hooks. */
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
-	    (void *)&old_hooks, new_size), 0, "Unexpected extent_hooks error");
+	                (void *)&old_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, NULL, 0), 0, "Unexpected extent_hooks error");
-	expect_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
+	                &old_size, NULL, 0),
+	    0, "Unexpected extent_hooks error");
+	expect_ptr_eq(
+	    old_hooks, default_hooks, "Unexpected extent_hooks error");
 	expect_ptr_eq(old_hooks->alloc, default_hooks->alloc,
 	    "Unexpected extent_hooks error");
 	expect_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
@@ -213,8 +221,8 @@ TEST_BEGIN(test_extent_manual_hook) {
 TEST_END
 
 TEST_BEGIN(test_extent_auto_hook) {
-	unsigned arena_ind;
-	size_t new_size, sz;
+	unsigned        arena_ind;
+	size_t          new_size, sz;
 	extent_hooks_t *new_hooks;
 
 	extent_hooks_prep();
@@ -223,7 +231,8 @@ TEST_BEGIN(test_extent_auto_hook) {
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
+	                (void *)&new_hooks, new_size),
+	    0, "Unexpected mallctl() failure");
 
 	test_skip_if(is_background_thread_enabled());
 	test_extent_body(arena_ind);
@@ -231,19 +240,18 @@ TEST_BEGIN(test_extent_auto_hook) {
 TEST_END
 
 static void
-test_arenas_create_ext_base(arena_config_t config,
-	bool expect_hook_data, bool expect_hook_metadata)
-{
+test_arenas_create_ext_base(
+    arena_config_t config, bool expect_hook_data, bool expect_hook_metadata) {
 	unsigned arena, arena1;
-	void *ptr;
-	size_t sz = sizeof(unsigned);
+	void    *ptr;
+	size_t   sz = sizeof(unsigned);
 
 	extent_hooks_prep();
 
 	called_alloc = false;
-	expect_d_eq(mallctl("experimental.arenas_create_ext",
-	    (void *)&arena, &sz, &config, sizeof(arena_config_t)), 0,
-	    "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("experimental.arenas_create_ext", (void *)&arena,
+	                &sz, &config, sizeof(arena_config_t)),
+	    0, "Unexpected mallctl() failure");
 	expect_b_eq(called_alloc, expect_hook_metadata,
 	    "expected hook metadata alloc mismatch");
 
@@ -279,9 +287,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_extent_manual_hook,
-	    test_extent_auto_hook,
+	return test(test_extent_manual_hook, test_extent_auto_hook,
 	    test_arenas_create_ext_with_ehooks_no_metadata,
 	    test_arenas_create_ext_with_ehooks_with_metadata);
 }
diff --git a/test/integration/malloc.c b/test/integration/malloc.c
index ef449163..a77e44a6 100644
--- a/test/integration/malloc.c
+++ b/test/integration/malloc.c
@@ -11,6 +11,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_zero_alloc);
+	return test(test_zero_alloc);
 }
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index fdf1e3f4..c7ed0fb9 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -3,7 +3,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -25,12 +25,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -64,36 +64,37 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
-	expect_ptr_null(mallocx(largemax+1, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", largemax+1);
+	expect_ptr_null(mallocx(largemax + 1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(mallocx(ZU(PTRDIFF_MAX) + 1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(mallocx(SIZE_T_MAX, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)),
 	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 }
 TEST_END
 
 static void *
 remote_alloc(void *arg) {
 	unsigned arena;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
-	void *ptr = mallocx(large_sz, MALLOCX_ARENA(arena)
-	    | MALLOCX_TCACHE_NONE);
+	void *ptr = mallocx(
+	    large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void **ret = (void **)arg;
 	*ret = ptr;
 
@@ -114,16 +115,16 @@ TEST_BEGIN(test_remote_free) {
 TEST_END
 
 TEST_BEGIN(test_oom) {
-	size_t largemax;
-	bool oom;
-	void *ptrs[3];
+	size_t   largemax;
+	bool     oom;
+	void    *ptrs[3];
 	unsigned i;
 
 	/*
 	 * It should be impossible to allocate three objects that each consume
 	 * nearly half the virtual address space.
 	 */
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		ptrs[i] = mallocx(largemax, MALLOCX_ARENA(0));
@@ -143,10 +144,10 @@ TEST_BEGIN(test_oom) {
 
 #if LG_SIZEOF_PTR == 3
 	expect_ptr_null(mallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x8000000000000000ULL)),
+	                    MALLOCX_ALIGN(0x8000000000000000ULL)),
 	    "Expected OOM for mallocx()");
-	expect_ptr_null(mallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x80000000)),
+	expect_ptr_null(
+	    mallocx(0x8000000000000000ULL, MALLOCX_ALIGN(0x80000000)),
 	    "Expected OOM for mallocx()");
 #else
 	expect_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
@@ -164,20 +165,20 @@ TEST_BEGIN(test_basic) {
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
 		size_t nsz, rsz;
-		void *p;
+		void  *p;
 		nsz = nallocx(sz, 0);
 		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		expect_ptr_not_null(p,
-		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		expect_zu_ge(rsz, sz, "Real size smaller than expected");
 		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		expect_ptr_not_null(p,
-		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
@@ -197,53 +198,57 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 	const char *percpu_arena;
-	size_t sz = sizeof(percpu_arena);
+	size_t      sz = sizeof(percpu_arena);
 
-	if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
-	    strcmp(percpu_arena, "disabled") != 0) {
-		test_skip("test_alignment_and_size skipped: "
+	if (mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0)
+	    || strcmp(percpu_arena, "disabled") != 0) {
+		test_skip(
+		    "test_alignment_and_size skipped: "
 		    "not working with percpu arena.");
 	};
 #define MAXALIGN (((size_t)1) << 23)
 #define NITER 4
-	size_t nsz, rsz, alignment, total;
+	size_t   nsz, rsz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO | MALLOCX_ARENA(0));
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO
+				        | MALLOCX_ARENA(0));
 				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
-				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO | MALLOCX_ARENA(0));
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
+				ps[i] = mallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO
+				        | MALLOCX_ARENA(0));
 				expect_ptr_not_null(ps[i],
 				    "mallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
 				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
-				expect_ptr_null(
-				    (void *)((uintptr_t)ps[i] & (alignment-1)),
-				    "%p inadequately aligned for"
-				    " alignment=%zu, size=%zu", ps[i],
+				    "alignment=%zu, size=%zu",
 				    alignment, sz);
+				expect_ptr_null((void *)((uintptr_t)ps[i]
+				                    & (alignment - 1)),
+				    "%p inadequately aligned for"
+				    " alignment=%zu, size=%zu",
+				    ps[i], alignment, sz);
 				total += rsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -265,10 +270,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_overflow,
-	    test_oom,
-	    test_remote_free,
-	    test_basic,
+	return test(test_overflow, test_oom, test_remote_free, test_basic,
 	    test_alignment_and_size);
 }
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index ce63327c..17282e84 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -12,13 +12,14 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_overflow) {
 	unsigned nlextents;
-	size_t mib[4];
-	size_t sz, miblen, max_size_class;
-	void *p;
+	size_t   mib[4];
+	size_t   sz, miblen, max_size_class;
+	void    *p;
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
@@ -26,8 +27,9 @@ TEST_BEGIN(test_overflow) {
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
-	    NULL, 0), 0, "Unexpected mallctlbymib() error");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() error");
 
 	expect_ptr_null(malloc(max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
@@ -54,6 +56,5 @@ JEMALLOC_DIAGNOSTIC_POP
 
 int
 main(void) {
-	return test(
-	    test_overflow);
+	return test(test_overflow);
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 2da0549b..e0df56f3 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -15,48 +15,44 @@ purge(void) {
 
 TEST_BEGIN(test_alignment_errors) {
 	size_t alignment;
-	void *p;
+	void  *p;
 
 	for (alignment = 0; alignment < sizeof(void *); alignment++) {
 		expect_d_eq(posix_memalign(&p, alignment, 1), EINVAL,
-		    "Expected error for invalid alignment %zu",
-		    alignment);
+		    "Expected error for invalid alignment %zu", alignment);
 	}
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
-	    alignment <<= 1) {
+	     alignment <<= 1) {
 		expect_d_ne(posix_memalign(&p, alignment + 1, 1), 0,
-		    "Expected error for invalid alignment %zu",
-		    alignment + 1);
+		    "Expected error for invalid alignment %zu", alignment + 1);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
-	void *p;
+	void  *p;
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x8000000000000000);
-	size      = UINT64_C(0x8000000000000000);
+	size = UINT64_C(0x8000000000000000);
 #else
 	alignment = 0x80000000LU;
-	size      = 0x80000000LU;
+	size = 0x80000000LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x4000000000000000);
-	size      = UINT64_C(0xc000000000000001);
+	size = UINT64_C(0xc000000000000001);
 #else
 	alignment = 0x40000000LU;
-	size      = 0xc0000001LU;
+	size = 0xc0000001LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 
 	alignment = 0x10LU;
 #if LG_SIZEOF_PTR == 3
@@ -65,33 +61,29 @@ TEST_BEGIN(test_oom_errors) {
 	size = 0xfffffff0LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 }
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 #define NITER 4
-	size_t alignment, size, total;
+	size_t   alignment, size, total;
 	unsigned i;
-	int err;
-	void *ps[NITER];
+	int      err;
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (size = 0;
-		    size < 3 * alignment && size < (1U << 31);
-		    size += ((size == 0) ? 1 :
-		    (alignment >> (LG_SIZEOF_PTR-1)) - 1)) {
+		for (size = 0; size < 3 * alignment && size < (1U << 31);
+		     size += ((size == 0)
+		             ? 1
+		             : (alignment >> (LG_SIZEOF_PTR - 1)) - 1)) {
 			for (i = 0; i < NITER; i++) {
-				err = posix_memalign(&ps[i],
-				    alignment, size);
+				err = posix_memalign(&ps[i], alignment, size);
 				if (err) {
 					char buf[BUFERROR_BUF];
 
@@ -122,7 +114,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_alignment_errors,
-	    test_oom_errors,
-	    test_alignment_and_size);
+	    test_alignment_errors, test_oom_errors, test_alignment_and_size);
 }
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 68b8f381..8e822df7 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -3,7 +3,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -25,12 +25,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -49,7 +49,7 @@ TEST_BEGIN(test_grow_and_shrink) {
 	size_t tsz;
 #define NCYCLES 3
 	unsigned i, j;
-#define NSZS 1024
+#define NSZS 64
 	size_t szs[NSZS];
 #define MAXSZ ZU(12 * 1024 * 1024)
 
@@ -58,25 +58,26 @@ TEST_BEGIN(test_grow_and_shrink) {
 	szs[0] = sallocx(p, 0);
 
 	for (i = 0; i < NCYCLES; i++) {
-		for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) {
-			q = rallocx(p, szs[j-1]+1, 0);
+		for (j = 1; j < NSZS && szs[j - 1] < MAXSZ; j++) {
+			q = rallocx(p, szs[j - 1] + 1, 0);
 			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
-			    szs[j-1], szs[j-1]+1);
+			    szs[j - 1], szs[j - 1] + 1);
 			szs[j] = sallocx(q, 0);
-			expect_zu_ne(szs[j], szs[j-1]+1,
-			    "Expected size to be at least: %zu", szs[j-1]+1);
+			expect_zu_ne(szs[j], szs[j - 1] + 1,
+			    "Expected size to be at least: %zu",
+			    szs[j - 1] + 1);
 			p = q;
 		}
 
 		for (j--; j > 0; j--) {
-			q = rallocx(p, szs[j-1], 0);
+			q = rallocx(p, szs[j - 1], 0);
 			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
-			    szs[j], szs[j-1]);
+			    szs[j], szs[j - 1]);
 			tsz = sallocx(q, 0);
-			expect_zu_eq(tsz, szs[j-1],
-			    "Expected size=%zu, got size=%zu", szs[j-1], tsz);
+			expect_zu_eq(tsz, szs[j - 1],
+			    "Expected size=%zu, got size=%zu", szs[j - 1], tsz);
 			p = q;
 		}
 	}
@@ -99,11 +100,12 @@ validate_fill(void *p, uint8_t c, size_t offset, size_t len) {
 	size_t i;
 
 	for (i = 0; i < len; i++) {
-		uint8_t b = buf[offset+i];
+		uint8_t b = buf[offset + i];
 		if (b != c) {
-			test_fail("Allocation at %p (len=%zu) contains %#x "
-			    "rather than %#x at offset %zu", p, len, b, c,
-			    offset+i);
+			test_fail(
+			    "Allocation at %p (len=%zu) contains %#x "
+			    "rather than %#x at offset %zu",
+			    p, len, b, c, offset + i);
 			ret = true;
 		}
 	}
@@ -118,35 +120,37 @@ TEST_BEGIN(test_zero) {
 	 */
 	void *volatile p, *volatile q;
 	size_t psz, qsz, i, j;
-	size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024};
+	size_t start_sizes[] = {1, 3 * 1024, 63 * 1024, 4095 * 1024};
 #define FILL_BYTE 0xaaU
 #define RANGE 2048
 
-	for (i = 0; i < sizeof(start_sizes)/sizeof(size_t); i++) {
+	for (i = 0; i < sizeof(start_sizes) / sizeof(size_t); i++) {
 		size_t start_size = start_sizes[i];
 		p = mallocx(start_size, MALLOCX_ZERO);
 		expect_ptr_not_null(p, "Unexpected mallocx() error");
 		psz = sallocx(p, 0);
 
-		expect_false(validate_fill(p, 0, 0, psz),
-		    "Expected zeroed memory");
+		expect_false(
+		    validate_fill(p, 0, 0, psz), "Expected zeroed memory");
 		memset(p, FILL_BYTE, psz);
 		expect_false(validate_fill(p, FILL_BYTE, 0, psz),
 		    "Expected filled memory");
 
 		for (j = 1; j < RANGE; j++) {
-			q = rallocx(p, start_size+j, MALLOCX_ZERO);
+			q = rallocx(p, start_size + j, MALLOCX_ZERO);
 			expect_ptr_not_null(q, "Unexpected rallocx() error");
 			qsz = sallocx(q, 0);
 			if (q != p || qsz != psz) {
-				expect_false(validate_fill(q, FILL_BYTE, 0,
-				    psz), "Expected filled memory");
-				expect_false(validate_fill(q, 0, psz, qsz-psz),
+				expect_false(
+				    validate_fill(q, FILL_BYTE, 0, psz),
+				    "Expected filled memory");
+				expect_false(
+				    validate_fill(q, 0, psz, qsz - psz),
 				    "Expected zeroed memory");
 			}
 			if (psz != qsz) {
-				memset((void *)((uintptr_t)q+psz), FILL_BYTE,
-				    qsz-psz);
+				memset((void *)((uintptr_t)q + psz), FILL_BYTE,
+				    qsz - psz);
 				psz = qsz;
 			}
 			p = q;
@@ -160,7 +164,7 @@ TEST_BEGIN(test_zero) {
 TEST_END
 
 TEST_BEGIN(test_align) {
-	void *p, *q;
+	void  *p, *q;
 	size_t align;
 #define MAX_ALIGN (ZU(1) << 25)
 
@@ -170,12 +174,10 @@ TEST_BEGIN(test_align) {
 
 	for (align <<= 1; align <= MAX_ALIGN; align <<= 1) {
 		q = rallocx(p, 1, MALLOCX_ALIGN(align));
-		expect_ptr_not_null(q,
-		    "Unexpected rallocx() error for align=%zu", align);
-		expect_ptr_null(
-		    (void *)((uintptr_t)q & (align-1)),
-		    "%p inadequately aligned for align=%zu",
-		    q, align);
+		expect_ptr_not_null(
+		    q, "Unexpected rallocx() error for align=%zu", align);
+		expect_ptr_null((void *)((uintptr_t)q & (align - 1)),
+		    "%p inadequately aligned for align=%zu", q, align);
 		p = q;
 	}
 	dallocx(p, 0);
@@ -191,19 +193,19 @@ TEST_BEGIN(test_align_enum) {
 		for (size_t lg_size = LG_MIN; lg_size <= LG_MAX; ++lg_size) {
 			size_t size = 1 << lg_size;
 			for (size_t lg_align_next = LG_MIN;
-			    lg_align_next <= LG_MAX; ++lg_align_next) {
-				int flags = MALLOCX_LG_ALIGN(lg_align);
+			     lg_align_next <= LG_MAX; ++lg_align_next) {
+				int   flags = MALLOCX_LG_ALIGN(lg_align);
 				void *p = mallocx(1, flags);
-				assert_ptr_not_null(p,
-				    "Unexpected mallocx() error");
+				assert_ptr_not_null(
+				    p, "Unexpected mallocx() error");
 				assert_zu_eq(nallocx(1, flags),
 				    TEST_MALLOC_SIZE(p),
 				    "Wrong mallocx() usable size");
-				int flags_next =
-				    MALLOCX_LG_ALIGN(lg_align_next);
+				int flags_next = MALLOCX_LG_ALIGN(
+				    lg_align_next);
 				p = rallocx(p, size, flags_next);
-				assert_ptr_not_null(p,
-				    "Unexpected rallocx() error");
+				assert_ptr_not_null(
+				    p, "Unexpected rallocx() error");
 				expect_zu_eq(nallocx(size, flags_next),
 				    TEST_MALLOC_SIZE(p),
 				    "Wrong rallocx() usable size");
@@ -223,20 +225,20 @@ TEST_BEGIN(test_lg_align_and_zero) {
 	 */
 	void *volatile p, *volatile q;
 	unsigned lg_align;
-	size_t sz;
+	size_t   sz;
 #define MAX_LG_ALIGN 25
 #define MAX_VALIDATE (ZU(1) << 22)
 
 	lg_align = 0;
-	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
+	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align) | MALLOCX_ZERO);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) {
-		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
-		expect_ptr_not_null(q,
-		    "Unexpected rallocx() error for lg_align=%u", lg_align);
+		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align) | MALLOCX_ZERO);
+		expect_ptr_not_null(
+		    q, "Unexpected rallocx() error for lg_align=%u", lg_align);
 		expect_ptr_null(
-		    (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)),
+		    (void *)((uintptr_t)q & ((ZU(1) << lg_align) - 1)),
 		    "%p inadequately aligned for lg_align=%u", q, lg_align);
 		sz = sallocx(q, 0);
 		if ((sz << 1) <= MAX_VALIDATE) {
@@ -245,9 +247,10 @@ TEST_BEGIN(test_lg_align_and_zero) {
 		} else {
 			expect_false(validate_fill(q, 0, 0, MAX_VALIDATE),
 			    "Expected zeroed memory");
-			expect_false(validate_fill(
-			    (void *)((uintptr_t)q+sz-MAX_VALIDATE),
-			    0, 0, MAX_VALIDATE), "Expected zeroed memory");
+			expect_false(validate_fill((void *)((uintptr_t)q + sz
+			                               - MAX_VALIDATE),
+			                 0, 0, MAX_VALIDATE),
+			    "Expected zeroed memory");
 		}
 		p = q;
 	}
@@ -269,25 +272,25 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
-	void *p;
+	void  *p;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(1, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	expect_ptr_null(rallocx(p, largemax+1, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax+1);
+	expect_ptr_null(rallocx(p, largemax + 1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(rallocx(p, ZU(PTRDIFF_MAX) + 1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(rallocx(p, SIZE_T_MAX, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)),
 	    "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 
 	dallocx(p, 0);
 }
@@ -298,11 +301,6 @@ JEMALLOC_DIAGNOSTIC_POP
 
 int
 main(void) {
-	return test(
-	    test_grow_and_shrink,
-	    test_zero,
-	    test_align,
-	    test_align_enum,
-	    test_lg_align_and_zero,
-	    test_overflow);
+	return test(test_grow_and_shrink, test_zero, test_align,
+	    test_align_enum, test_lg_align_and_zero, test_overflow);
 }
diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index ca014485..ec2fb938 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -10,26 +10,23 @@ TEST_BEGIN(test_basic) {
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
-	size_t nsz, sz, alignment, total;
+	size_t   nsz, sz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
-				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
+				ps[i] = mallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				total += nsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -49,7 +46,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_basic,
-	    test_alignment_and_size);
+	return test_no_reentrancy(test_basic, test_alignment_and_size);
 }
diff --git a/test/integration/slab_sizes.c b/test/integration/slab_sizes.c
index f6a66f21..f1ff67aa 100644
--- a/test/integration/slab_sizes.c
+++ b/test/integration/slab_sizes.c
@@ -4,10 +4,10 @@
 
 TEST_BEGIN(test_slab_sizes) {
 	unsigned nbins;
-	size_t page;
-	size_t sizemib[4];
-	size_t slabmib[4];
-	size_t len;
+	size_t   page;
+	size_t   sizemib[4];
+	size_t   slabmib[4];
+	size_t   len;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
@@ -33,12 +33,14 @@ TEST_BEGIN(test_slab_sizes) {
 		len = sizeof(size_t);
 		sizemib[2] = i;
 		slabmib[2] = i;
-		expect_d_eq(mallctlbymib(sizemib, 4, (void *)&bin_size, &len,
-		    NULL, 0), 0, "bin size mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(sizemib, 4, (void *)&bin_size, &len, NULL, 0),
+		    0, "bin size mallctlbymib failure");
 
 		len = sizeof(size_t);
-		expect_d_eq(mallctlbymib(slabmib, 4, (void *)&slab_size, &len,
-		    NULL, 0), 0, "slab size mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(slabmib, 4, (void *)&slab_size, &len, NULL, 0),
+		    0, "slab size mallctlbymib failure");
 
 		if (bin_size < 100) {
 			/*
@@ -51,8 +53,7 @@ TEST_BEGIN(test_slab_sizes) {
 			expect_zu_ge(slab_size, biggest_slab_seen,
 			    "Slab sizes should go up");
 			biggest_slab_seen = slab_size;
-		} else if (
-		    (100 <= bin_size && bin_size < 128)
+		} else if ((100 <= bin_size && bin_size < 128)
 		    || (128 < bin_size && bin_size <= 200)) {
 			expect_zu_eq(slab_size, page,
 			    "Forced-small slabs should be small");
@@ -75,6 +76,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_slab_sizes);
+	return test(test_slab_sizes);
 }
diff --git a/test/integration/smallocx.c b/test/integration/smallocx.c
index 389319b7..186a6492 100644
--- a/test/integration/smallocx.c
+++ b/test/integration/smallocx.c
@@ -5,25 +5,24 @@
 #define STR(x) STR_HELPER(x)
 
 #ifndef JEMALLOC_VERSION_GID_IDENT
-  #error "JEMALLOC_VERSION_GID_IDENT not defined"
+#	error "JEMALLOC_VERSION_GID_IDENT not defined"
 #endif
 
-#define JOIN(x, y) x ## y
+#define JOIN(x, y) x##y
 #define JOIN2(x, y) JOIN(x, y)
 #define smallocx JOIN2(smallocx_, JEMALLOC_VERSION_GID_IDENT)
 
 typedef struct {
-	void *ptr;
+	void  *ptr;
 	size_t size;
 } smallocx_return_t;
 
-extern smallocx_return_t
-smallocx(size_t size, int flags);
+extern smallocx_return_t smallocx(size_t size, int flags);
 
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -45,12 +44,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -84,36 +83,37 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
-	expect_ptr_null(smallocx(largemax+1, 0).ptr,
-	    "Expected OOM for smallocx(size=%#zx, 0)", largemax+1);
+	expect_ptr_null(smallocx(largemax + 1, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(smallocx(ZU(PTRDIFF_MAX)+1, 0).ptr,
-	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(smallocx(ZU(PTRDIFF_MAX) + 1, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(smallocx(SIZE_T_MAX, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)).ptr,
+	expect_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)).ptr,
 	    "Expected OOM for smallocx(size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 }
 TEST_END
 
 static void *
 remote_alloc(void *arg) {
 	unsigned arena;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
-	smallocx_return_t r
-	    = smallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
+	smallocx_return_t r = smallocx(
+	    large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void *ptr = r.ptr;
 	expect_zu_eq(r.size,
 	    nallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE),
@@ -138,16 +138,16 @@ TEST_BEGIN(test_remote_free) {
 TEST_END
 
 TEST_BEGIN(test_oom) {
-	size_t largemax;
-	bool oom;
-	void *ptrs[3];
+	size_t   largemax;
+	bool     oom;
+	void    *ptrs[3];
 	unsigned i;
 
 	/*
 	 * It should be impossible to allocate three objects that each consume
 	 * nearly half the virtual address space.
 	 */
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		ptrs[i] = smallocx(largemax, 0).ptr;
@@ -167,10 +167,11 @@ TEST_BEGIN(test_oom) {
 
 #if LG_SIZEOF_PTR == 3
 	expect_ptr_null(smallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x8000000000000000ULL)).ptr,
+	                    MALLOCX_ALIGN(0x8000000000000000ULL))
+	                    .ptr,
 	    "Expected OOM for smallocx()");
-	expect_ptr_null(smallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x80000000)).ptr,
+	expect_ptr_null(
+	    smallocx(0x8000000000000000ULL, MALLOCX_ALIGN(0x80000000)).ptr,
 	    "Expected OOM for smallocx()");
 #else
 	expect_ptr_null(smallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)).ptr,
@@ -188,15 +189,15 @@ TEST_BEGIN(test_basic) {
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
 		smallocx_return_t ret;
-		size_t nsz, rsz, smz;
-		void *p;
+		size_t            nsz, rsz, smz;
+		void             *p;
 		nsz = nallocx(sz, 0);
 		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		expect_ptr_not_null(p,
-		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		expect_zu_ge(rsz, sz, "Real size smaller than expected");
 		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
@@ -206,8 +207,8 @@ TEST_BEGIN(test_basic) {
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		expect_ptr_not_null(p,
-		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
@@ -230,58 +231,61 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 	const char *percpu_arena;
-	size_t sz = sizeof(percpu_arena);
+	size_t      sz = sizeof(percpu_arena);
 
-	if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
-	    strcmp(percpu_arena, "disabled") != 0) {
-		test_skip("test_alignment_and_size skipped: "
+	if (mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0)
+	    || strcmp(percpu_arena, "disabled") != 0) {
+		test_skip(
+		    "test_alignment_and_size skipped: "
 		    "not working with percpu arena.");
 	};
 #define MAXALIGN (((size_t)1) << 23)
 #define NITER 4
-	size_t nsz, rsz, smz, alignment, total;
+	size_t   nsz, rsz, smz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
-				smallocx_return_t ret
-				    = smallocx(sz, MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
+				smallocx_return_t ret = smallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				ps[i] = ret.ptr;
 				expect_ptr_not_null(ps[i],
 				    "smallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
 				smz = ret.size;
 				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, smz,
 				    "nallocx()/smallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
-				expect_ptr_null(
-				    (void *)((uintptr_t)ps[i] & (alignment-1)),
-				    "%p inadequately aligned for"
-				    " alignment=%zu, size=%zu", ps[i],
+				    "alignment=%zu, size=%zu",
 				    alignment, sz);
+				expect_ptr_null((void *)((uintptr_t)ps[i]
+				                    & (alignment - 1)),
+				    "%p inadequately aligned for"
+				    " alignment=%zu, size=%zu",
+				    ps[i], alignment, sz);
 				total += rsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -303,10 +307,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_overflow,
-	    test_oom,
-	    test_remote_free,
-	    test_basic,
+	return test(test_overflow, test_oom, test_remote_free, test_basic,
 	    test_alignment_and_size);
 }
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 4a6abf64..48062183 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -5,10 +5,10 @@
 void *
 thd_start(void *arg) {
 	unsigned main_arena_ind = *(unsigned *)arg;
-	void *p;
+	void    *p;
 	unsigned arena_ind;
-	size_t size;
-	int err;
+	size_t   size;
+	int      err;
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Error in malloc()");
@@ -16,7 +16,7 @@ thd_start(void *arg) {
 
 	size = sizeof(arena_ind);
 	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size,
-	    (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
+	         (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -24,8 +24,8 @@ thd_start(void *arg) {
 	}
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
-	    0))) {
+	if ((err = mallctl(
+	         "thread.arena", (void *)&arena_ind, &size, NULL, 0))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -46,28 +46,28 @@ mallctl_failure(int err) {
 }
 
 TEST_BEGIN(test_thread_arena) {
-	void *p;
-	int err;
-	thd_t thds[NTHREADS];
+	void    *p;
+	int      err;
+	thd_t    thds[NTHREADS];
 	unsigned i;
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Error in malloc()");
 
 	unsigned arena_ind, old_arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Arena creation failure");
 
 	size_t size = sizeof(arena_ind);
 	if ((err = mallctl("thread.arena", (void *)&old_arena_ind, &size,
-	    (void *)&arena_ind, sizeof(arena_ind))) != 0) {
+	         (void *)&arena_ind, sizeof(arena_ind)))
+	    != 0) {
 		mallctl_failure(err);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
-		thd_create(&thds[i], thd_start,
-		    (void *)&arena_ind);
+		thd_create(&thds[i], thd_start, (void *)&arena_ind);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -81,6 +81,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_thread_arena);
+	return test(test_thread_arena);
 }
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index d44dbe90..3c7c95f6 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -2,60 +2,69 @@
 
 void *
 thd_start(void *arg) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	if (e0) {
 		e1 = false;
 		expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-		    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+		                (void *)&e1, sz),
+		    0, "Unexpected mallctl() error");
 		expect_true(e0, "tcache should be enabled");
 	}
 
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
@@ -78,10 +87,6 @@ TEST_END
 int
 main(void) {
 	/* Run tests multiple times to check for bad interactions. */
-	return test(
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread);
+	return test(test_main_thread, test_subthread, test_main_thread,
+	    test_subthread, test_main_thread);
 }
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 13708548..9b5ebcde 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -11,15 +11,16 @@ arena_ind(void) {
 
 	if (ind == 0) {
 		size_t sz = sizeof(ind);
-		expect_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
-		    0), 0, "Unexpected mallctl failure creating arena");
+		expect_d_eq(
+		    mallctl("arenas.create", (void *)&ind, &sz, NULL, 0), 0,
+		    "Unexpected mallctl failure creating arena");
 	}
 
 	return ind;
 }
 
 TEST_BEGIN(test_same_size) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
@@ -34,14 +35,14 @@ TEST_BEGIN(test_same_size) {
 TEST_END
 
 TEST_BEGIN(test_extra_no_move) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
-	tsz = xallocx(p, sz, sz-42, 0);
+	tsz = xallocx(p, sz, sz - 42, 0);
 	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
@@ -49,7 +50,7 @@ TEST_BEGIN(test_extra_no_move) {
 TEST_END
 
 TEST_BEGIN(test_no_move_fail) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
@@ -66,7 +67,7 @@ TEST_END
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -93,12 +94,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -115,25 +116,25 @@ get_large_size(size_t ind) {
 
 TEST_BEGIN(test_size) {
 	size_t small0, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test smallest supported size. */
-	expect_zu_eq(xallocx(p, 1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, 1, 0, 0), small0, "Unexpected xallocx() behavior");
 
 	/* Test largest supported size. */
 	expect_zu_le(xallocx(p, largemax, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test size overflow. */
-	expect_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 1, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 	expect_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
@@ -144,29 +145,29 @@ TEST_END
 
 TEST_BEGIN(test_size_extra_overflow) {
 	size_t small0, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test overflows that can be resolved by clamping extra. */
-	expect_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax - 1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
 	expect_zu_le(xallocx(p, largemax, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test overflow such that largemax-size underflows. */
-	expect_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 2, 3, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX - 2, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX - 1, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -175,21 +176,21 @@ TEST_END
 
 TEST_BEGIN(test_extra_small) {
 	size_t small0, small1, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
 	small1 = get_small_size(1);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
-	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, small1, 0, 0), small0, "Unexpected xallocx() behavior");
 
-	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, small1, 0, 0), small0, "Unexpected xallocx() behavior");
 
 	expect_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
 	    "Unexpected xallocx() behavior");
@@ -205,16 +206,16 @@ TEST_BEGIN(test_extra_small) {
 TEST_END
 
 TEST_BEGIN(test_extra_large) {
-	int flags = MALLOCX_ARENA(arena_ind());
+	int    flags = MALLOCX_ARENA(arena_ind());
 	size_t smallmax, large1, large2, large3, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
-	smallmax = get_small_size(get_nsmall()-1);
+	smallmax = get_small_size(get_nsmall() - 1);
 	large1 = get_large_size(1);
 	large2 = get_large_size(2);
 	large3 = get_large_size(3);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(large3, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
@@ -246,7 +247,7 @@ TEST_BEGIN(test_extra_large) {
 	/* Test size increase with zero extra. */
 	expect_zu_le(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, largemax+1, 0, flags), large3,
+	expect_zu_le(xallocx(p, largemax + 1, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 
 	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
@@ -276,8 +277,8 @@ TEST_END
 static void
 print_filled_extents(const void *p, uint8_t c, size_t len) {
 	const uint8_t *pc = (const uint8_t *)p;
-	size_t i, range0;
-	uint8_t c0;
+	size_t         i, range0;
+	uint8_t        c0;
 
 	malloc_printf("  p=%p, c=%#x, len=%zu:", p, c, len);
 	range0 = 0;
@@ -295,10 +296,10 @@ print_filled_extents(const void *p, uint8_t c, size_t len) {
 static bool
 validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 	const uint8_t *pc = (const uint8_t *)p;
-	bool err;
-	size_t i;
+	bool           err;
+	size_t         i;
 
-	for (i = offset, err = false; i < offset+len; i++) {
+	for (i = offset, err = false; i < offset + len; i++) {
 		if (pc[i] != c) {
 			err = true;
 		}
@@ -313,16 +314,16 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 
 static void
 test_zero(size_t szmin, size_t szmax) {
-	int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
+	int    flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
 	size_t sz, nsz;
-	void *p;
+	void  *p;
 #define FILL_BYTE 0x7aU
 
 	sz = szmax;
 	p = mallocx(sz, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
-	expect_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
-	    sz);
+	expect_false(
+	    validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu", sz);
 
 	/*
 	 * Fill with non-zero so that non-debug builds are more likely to detect
@@ -342,16 +343,16 @@ test_zero(size_t szmin, size_t szmax) {
 	    "Memory not filled: sz=%zu", sz);
 
 	for (sz = szmin; sz < szmax; sz = nsz) {
-		nsz = nallocx(sz+1, flags);
-		if (xallocx(p, sz+1, 0, flags) != nsz) {
-			p = rallocx(p, sz+1, flags);
+		nsz = nallocx(sz + 1, flags);
+		if (xallocx(p, sz + 1, 0, flags) != nsz) {
+			p = rallocx(p, sz + 1, flags);
 			expect_ptr_not_null(p, "Unexpected rallocx() failure");
 		}
 		expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 		    "Memory not filled: sz=%zu", sz);
-		expect_false(validate_fill(p, 0x00, sz, nsz-sz),
-		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz);
-		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz);
+		expect_false(validate_fill(p, 0x00, sz, nsz - sz),
+		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz - sz);
+		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz - sz);
 		expect_false(validate_fill(p, FILL_BYTE, 0, nsz),
 		    "Memory not filled: nsz=%zu", nsz);
 	}
@@ -372,13 +373,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_same_size,
-	    test_extra_no_move,
-	    test_no_move_fail,
-	    test_size,
-	    test_size_extra_overflow,
-	    test_extra_small,
-	    test_extra_large,
-	    test_zero_large);
+	return test(test_same_size, test_extra_no_move, test_no_move_fail,
+	    test_size, test_size_extra_overflow, test_extra_small,
+	    test_extra_large, test_zero_large);
 }
diff --git a/test/src/SFMT.c b/test/src/SFMT.c
index c05e2183..87b1fd1c 100644
--- a/test/src/SFMT.c
+++ b/test/src/SFMT.c
@@ -50,19 +50,19 @@
 #include "test/SFMT-params.h"
 
 #if defined(JEMALLOC_BIG_ENDIAN) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(ONLY64) && !defined(BIG_ENDIAN64)
-  #if defined(__GNUC__)
-    #error "-DONLY64 must be specified with -DBIG_ENDIAN64"
-  #endif
-#undef ONLY64
+#	if defined(__GNUC__)
+#		error "-DONLY64 must be specified with -DBIG_ENDIAN64"
+#	endif
+#	undef ONLY64
 #endif
 /*------------------------------------------------------
   128-bit SIMD data type for Altivec, SSE2 or standard C
@@ -70,8 +70,8 @@
 #if defined(HAVE_ALTIVEC)
 /** 128-bit data structure */
 union W128_T {
-    vector unsigned int s;
-    uint32_t u[4];
+	vector unsigned int s;
+	uint32_t            u[4];
 };
 /** 128-bit data type */
 typedef union W128_T w128_t;
@@ -79,8 +79,8 @@ typedef union W128_T w128_t;
 #elif defined(HAVE_SSE2)
 /** 128-bit data structure */
 union W128_T {
-    __m128i si;
-    uint32_t u[4];
+	__m128i  si;
+	uint32_t u[4];
 };
 /** 128-bit data type */
 typedef union W128_T w128_t;
@@ -89,7 +89,7 @@ typedef union W128_T w128_t;
 
 /** 128-bit data structure */
 struct W128_T {
-    uint32_t u[4];
+	uint32_t u[4];
 };
 /** 128-bit data type */
 typedef struct W128_T w128_t;
@@ -97,13 +97,13 @@ typedef struct W128_T w128_t;
 #endif
 
 struct sfmt_s {
-    /** the 128-bit internal state array */
-    w128_t sfmt[N];
-    /** index counter to the 32-bit internal state array */
-    int idx;
-    /** a flag: it is 0 if and only if the internal state is not yet
+	/** the 128-bit internal state array */
+	w128_t sfmt[N];
+	/** index counter to the 32-bit internal state array */
+	int idx;
+	/** a flag: it is 0 if and only if the internal state is not yet
      * initialized. */
-    int initialized;
+	int initialized;
 };
 
 /*--------------------------------------
@@ -119,22 +119,22 @@ static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
   ----------------*/
 static inline int idxof(int i);
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-static inline void rshift128(w128_t *out,  w128_t const *in, int shift);
-static inline void lshift128(w128_t *out,  w128_t const *in, int shift);
+static inline void rshift128(w128_t *out, w128_t const *in, int shift);
+static inline void lshift128(w128_t *out, w128_t const *in, int shift);
 #endif
-static inline void gen_rand_all(sfmt_t *ctx);
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
+static inline void     gen_rand_all(sfmt_t *ctx);
+static inline void     gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
 static inline uint32_t func1(uint32_t x);
 static inline uint32_t func2(uint32_t x);
-static void period_certification(sfmt_t *ctx);
+static void            period_certification(sfmt_t *ctx);
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
 static inline void swap(w128_t *array, int size);
 #endif
 
 #if defined(HAVE_ALTIVEC)
-  #include "test/SFMT-alti.h"
+#	include "test/SFMT-alti.h"
 #elif defined(HAVE_SSE2)
-  #include "test/SFMT-sse2.h"
+#	include "test/SFMT-sse2.h"
 #endif
 
 /**
@@ -142,12 +142,14 @@ static inline void swap(w128_t *array, int size);
  * in BIG ENDIAN machine.
  */
 #ifdef ONLY64
-static inline int idxof(int i) {
-    return i ^ 1;
+static inline int
+idxof(int i) {
+	return i ^ 1;
 }
 #else
-static inline int idxof(int i) {
-    return i;
+static inline int
+idxof(int i) {
+	return i;
 }
 #endif
 /**
@@ -159,37 +161,39 @@ static inline int idxof(int i) {
  * @param shift the shift value
  */
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-#ifdef ONLY64
-static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	ifdef ONLY64
+static inline void
+rshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
-    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
+	th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
+	tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
 
-    oh = th >> (shift * 8);
-    ol = tl >> (shift * 8);
-    ol |= th << (64 - shift * 8);
-    out->u[0] = (uint32_t)(ol >> 32);
-    out->u[1] = (uint32_t)ol;
-    out->u[2] = (uint32_t)(oh >> 32);
-    out->u[3] = (uint32_t)oh;
+	oh = th >> (shift * 8);
+	ol = tl >> (shift * 8);
+	ol |= th << (64 - shift * 8);
+	out->u[0] = (uint32_t)(ol >> 32);
+	out->u[1] = (uint32_t)ol;
+	out->u[2] = (uint32_t)(oh >> 32);
+	out->u[3] = (uint32_t)oh;
 }
-#else
-static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	else
+static inline void
+rshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
-    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
+	th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
+	tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
 
-    oh = th >> (shift * 8);
-    ol = tl >> (shift * 8);
-    ol |= th << (64 - shift * 8);
-    out->u[1] = (uint32_t)(ol >> 32);
-    out->u[0] = (uint32_t)ol;
-    out->u[3] = (uint32_t)(oh >> 32);
-    out->u[2] = (uint32_t)oh;
+	oh = th >> (shift * 8);
+	ol = tl >> (shift * 8);
+	ol |= th << (64 - shift * 8);
+	out->u[1] = (uint32_t)(ol >> 32);
+	out->u[0] = (uint32_t)ol;
+	out->u[3] = (uint32_t)(oh >> 32);
+	out->u[2] = (uint32_t)oh;
 }
-#endif
+#	endif
 /**
  * This function simulates SIMD 128-bit left shift by the standard C.
  * The 128-bit integer given in in is shifted by (shift * 8) bits.
@@ -198,37 +202,39 @@ static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
  * @param in the 128-bit data to be shifted
  * @param shift the shift value
  */
-#ifdef ONLY64
-static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	ifdef ONLY64
+static inline void
+lshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
-    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
+	th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
+	tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
 
-    oh = th << (shift * 8);
-    ol = tl << (shift * 8);
-    oh |= tl >> (64 - shift * 8);
-    out->u[0] = (uint32_t)(ol >> 32);
-    out->u[1] = (uint32_t)ol;
-    out->u[2] = (uint32_t)(oh >> 32);
-    out->u[3] = (uint32_t)oh;
+	oh = th << (shift * 8);
+	ol = tl << (shift * 8);
+	oh |= tl >> (64 - shift * 8);
+	out->u[0] = (uint32_t)(ol >> 32);
+	out->u[1] = (uint32_t)ol;
+	out->u[2] = (uint32_t)(oh >> 32);
+	out->u[3] = (uint32_t)oh;
 }
-#else
-static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	else
+static inline void
+lshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
-    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
+	th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
+	tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
 
-    oh = th << (shift * 8);
-    ol = tl << (shift * 8);
-    oh |= tl >> (64 - shift * 8);
-    out->u[1] = (uint32_t)(ol >> 32);
-    out->u[0] = (uint32_t)ol;
-    out->u[3] = (uint32_t)(oh >> 32);
-    out->u[2] = (uint32_t)oh;
+	oh = th << (shift * 8);
+	ol = tl << (shift * 8);
+	oh |= tl >> (64 - shift * 8);
+	out->u[1] = (uint32_t)(ol >> 32);
+	out->u[0] = (uint32_t)ol;
+	out->u[3] = (uint32_t)(oh >> 32);
+	out->u[2] = (uint32_t)oh;
 }
-#endif
+#	endif
 #endif
 
 /**
@@ -240,41 +246,41 @@ static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
  * @param d a 128-bit part of the internal state array
  */
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-#ifdef ONLY64
-static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
-				w128_t *d) {
-    w128_t x;
-    w128_t y;
+#	ifdef ONLY64
+static inline void
+do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) {
+	w128_t x;
+	w128_t y;
 
-    lshift128(&x, a, SL2);
-    rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
-	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
-	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
-	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
-	^ (d->u[3] << SL1);
+	lshift128(&x, a, SL2);
+	rshift128(&y, c, SR2);
+	r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
+	    ^ (d->u[0] << SL1);
+	r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
+	    ^ (d->u[1] << SL1);
+	r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
+	    ^ (d->u[2] << SL1);
+	r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
+	    ^ (d->u[3] << SL1);
 }
-#else
-static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
-				w128_t *d) {
-    w128_t x;
-    w128_t y;
+#	else
+static inline void
+do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) {
+	w128_t x;
+	w128_t y;
 
-    lshift128(&x, a, SL2);
-    rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
-	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
-	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
-	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
-	^ (d->u[3] << SL1);
+	lshift128(&x, a, SL2);
+	rshift128(&y, c, SR2);
+	r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
+	    ^ (d->u[0] << SL1);
+	r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
+	    ^ (d->u[1] << SL1);
+	r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
+	    ^ (d->u[2] << SL1);
+	r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
+	    ^ (d->u[3] << SL1);
 }
-#endif
+#	endif
 #endif
 
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
@@ -282,24 +288,25 @@ static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    w128_t *r1, *r2;
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int     i;
+	w128_t *r1, *r2;
 
-    r1 = &ctx->sfmt[N - 2];
-    r2 = &ctx->sfmt[N - 1];
-    for (i = 0; i < N - POS1; i++) {
-	do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1,
-	  r2);
-	r1 = r2;
-	r2 = &ctx->sfmt[i];
-    }
-    for (; i < N; i++) {
-	do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1 - N], r1,
-	  r2);
-	r1 = r2;
-	r2 = &ctx->sfmt[i];
-    }
+	r1 = &ctx->sfmt[N - 2];
+	r2 = &ctx->sfmt[N - 1];
+	for (i = 0; i < N - POS1; i++) {
+		do_recursion(
+		    &ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
+		r1 = r2;
+		r2 = &ctx->sfmt[i];
+	}
+	for (; i < N; i++) {
+		do_recursion(&ctx->sfmt[i], &ctx->sfmt[i],
+		    &ctx->sfmt[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &ctx->sfmt[i];
+	}
 }
 
 /**
@@ -309,52 +316,58 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pseudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    w128_t *r1, *r2;
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int     i, j;
+	w128_t *r1, *r2;
 
-    r1 = &ctx->sfmt[N - 2];
-    r2 = &ctx->sfmt[N - 1];
-    for (i = 0; i < N - POS1; i++) {
-	do_recursion(&array[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (; i < N; i++) {
-	do_recursion(&array[i], &ctx->sfmt[i], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (; i < size - N; i++) {
-	do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	ctx->sfmt[j] = array[j + size - N];
-    }
-    for (; i < size; i++, j++) {
-	do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-	ctx->sfmt[j] = array[i];
-    }
+	r1 = &ctx->sfmt[N - 2];
+	r2 = &ctx->sfmt[N - 1];
+	for (i = 0; i < N - POS1; i++) {
+		do_recursion(
+		    &array[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (; i < N; i++) {
+		do_recursion(
+		    &array[i], &ctx->sfmt[i], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (; i < size - N; i++) {
+		do_recursion(
+		    &array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		ctx->sfmt[j] = array[j + size - N];
+	}
+	for (; i < size; i++, j++) {
+		do_recursion(
+		    &array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+		ctx->sfmt[j] = array[i];
+	}
 }
 #endif
 
 #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
-static inline void swap(w128_t *array, int size) {
-    int i;
-    uint32_t x, y;
+static inline void
+swap(w128_t *array, int size) {
+	int      i;
+	uint32_t x, y;
 
-    for (i = 0; i < size; i++) {
-	x = array[i].u[0];
-	y = array[i].u[2];
-	array[i].u[0] = array[i].u[1];
-	array[i].u[2] = array[i].u[3];
-	array[i].u[1] = x;
-	array[i].u[3] = y;
-    }
+	for (i = 0; i < size; i++) {
+		x = array[i].u[0];
+		y = array[i].u[2];
+		array[i].u[0] = array[i].u[1];
+		array[i].u[2] = array[i].u[3];
+		array[i].u[1] = x;
+		array[i].u[3] = y;
+	}
 }
 #endif
 /**
@@ -363,8 +376,9 @@ static inline void swap(w128_t *array, int size) {
  * @param x 32-bit integer
  * @return 32-bit integer
  */
-static uint32_t func1(uint32_t x) {
-    return (x ^ (x >> 27)) * (uint32_t)1664525UL;
+static uint32_t
+func1(uint32_t x) {
+	return (x ^ (x >> 27)) * (uint32_t)1664525UL;
 }
 
 /**
@@ -373,39 +387,41 @@ static uint32_t func1(uint32_t x) {
  * @param x 32-bit integer
  * @return 32-bit integer
  */
-static uint32_t func2(uint32_t x) {
-    return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
+static uint32_t
+func2(uint32_t x) {
+	return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
 }
 
 /**
  * This function certificate the period of 2^{MEXP}
  */
-static void period_certification(sfmt_t *ctx) {
-    int inner = 0;
-    int i, j;
-    uint32_t work;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+static void
+period_certification(sfmt_t *ctx) {
+	int       inner = 0;
+	int       i, j;
+	uint32_t  work;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 
-    for (i = 0; i < 4; i++)
-	inner ^= psfmt32[idxof(i)] & parity[i];
-    for (i = 16; i > 0; i >>= 1)
-	inner ^= inner >> i;
-    inner &= 1;
-    /* check OK */
-    if (inner == 1) {
-	return;
-    }
-    /* check NG, and modification */
-    for (i = 0; i < 4; i++) {
-	work = 1;
-	for (j = 0; j < 32; j++) {
-	    if ((work & parity[i]) != 0) {
-		psfmt32[idxof(i)] ^= work;
+	for (i = 0; i < 4; i++)
+		inner ^= psfmt32[idxof(i)] & parity[i];
+	for (i = 16; i > 0; i >>= 1)
+		inner ^= inner >> i;
+	inner &= 1;
+	/* check OK */
+	if (inner == 1) {
 		return;
-	    }
-	    work = work << 1;
 	}
-    }
+	/* check NG, and modification */
+	for (i = 0; i < 4; i++) {
+		work = 1;
+		for (j = 0; j < 32; j++) {
+			if ((work & parity[i]) != 0) {
+				psfmt32[idxof(i)] ^= work;
+				return;
+			}
+			work = work << 1;
+		}
+	}
 }
 
 /*----------------
@@ -416,8 +432,9 @@ static void period_certification(sfmt_t *ctx) {
  * The string shows the word size, the Mersenne exponent,
  * and all parameters of this generator.
  */
-const char *get_idstring(void) {
-    return IDSTR;
+const char *
+get_idstring(void) {
+	return IDSTR;
 }
 
 /**
@@ -425,8 +442,9 @@ const char *get_idstring(void) {
  * fill_array32() function.
  * @return minimum size of array used for fill_array32() function.
  */
-int get_min_array_size32(void) {
-    return N32;
+int
+get_min_array_size32(void) {
+	return N32;
 }
 
 /**
@@ -434,8 +452,9 @@ int get_min_array_size32(void) {
  * fill_array64() function.
  * @return minimum size of array used for fill_array64() function.
  */
-int get_min_array_size64(void) {
-    return N64;
+int
+get_min_array_size64(void) {
+	return N64;
 }
 
 #ifndef ONLY64
@@ -444,32 +463,34 @@ int get_min_array_size64(void) {
  * init_gen_rand or init_by_array must be called before this function.
  * @return 32-bit pseudorandom number
  */
-uint32_t gen_rand32(sfmt_t *ctx) {
-    uint32_t r;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+uint32_t
+gen_rand32(sfmt_t *ctx) {
+	uint32_t  r;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 
-    assert(ctx->initialized);
-    if (ctx->idx >= N32) {
-	gen_rand_all(ctx);
-	ctx->idx = 0;
-    }
-    r = psfmt32[ctx->idx++];
-    return r;
+	assert(ctx->initialized);
+	if (ctx->idx >= N32) {
+		gen_rand_all(ctx);
+		ctx->idx = 0;
+	}
+	r = psfmt32[ctx->idx++];
+	return r;
 }
 
 /* Generate a random integer in [0..limit). */
-uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
-    uint32_t ret, above;
+uint32_t
+gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
+	uint32_t ret, above;
 
-    above = 0xffffffffU - (0xffffffffU % limit);
-    while (1) {
-	ret = gen_rand32(ctx);
-	if (ret < above) {
-	    ret %= limit;
-	    break;
+	above = 0xffffffffU - (0xffffffffU % limit);
+	while (1) {
+		ret = gen_rand32(ctx);
+		if (ret < above) {
+			ret %= limit;
+			break;
+		}
 	}
-    }
-    return ret;
+	return ret;
 }
 #endif
 /**
@@ -479,47 +500,49 @@ uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
  * unless an initialization is again executed.
  * @return 64-bit pseudorandom number
  */
-uint64_t gen_rand64(sfmt_t *ctx) {
+uint64_t
+gen_rand64(sfmt_t *ctx) {
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    uint32_t r1, r2;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+	uint32_t  r1, r2;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 #else
-    uint64_t r;
-    uint64_t *psfmt64 = (uint64_t *)&ctx->sfmt[0].u[0];
+	uint64_t  r;
+	uint64_t *psfmt64 = (uint64_t *)&ctx->sfmt[0].u[0];
 #endif
 
-    assert(ctx->initialized);
-    assert(ctx->idx % 2 == 0);
+	assert(ctx->initialized);
+	assert(ctx->idx % 2 == 0);
 
-    if (ctx->idx >= N32) {
-	gen_rand_all(ctx);
-	ctx->idx = 0;
-    }
+	if (ctx->idx >= N32) {
+		gen_rand_all(ctx);
+		ctx->idx = 0;
+	}
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    r1 = psfmt32[ctx->idx];
-    r2 = psfmt32[ctx->idx + 1];
-    ctx->idx += 2;
-    return ((uint64_t)r2 << 32) | r1;
+	r1 = psfmt32[ctx->idx];
+	r2 = psfmt32[ctx->idx + 1];
+	ctx->idx += 2;
+	return ((uint64_t)r2 << 32) | r1;
 #else
-    r = psfmt64[ctx->idx / 2];
-    ctx->idx += 2;
-    return r;
+	r = psfmt64[ctx->idx / 2];
+	ctx->idx += 2;
+	return r;
 #endif
 }
 
 /* Generate a random integer in [0..limit). */
-uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
-    uint64_t ret, above;
+uint64_t
+gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
+	uint64_t ret, above;
 
-    above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit);
-    while (1) {
-	ret = gen_rand64(ctx);
-	if (ret < above) {
-	    ret %= limit;
-	    break;
+	above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit);
+	while (1) {
+		ret = gen_rand64(ctx);
+		if (ret < above) {
+			ret %= limit;
+			break;
+		}
 	}
-    }
-    return ret;
+	return ret;
 }
 
 #ifndef ONLY64
@@ -548,14 +571,15 @@ uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
  * returns the pointer to the aligned memory block.
  */
-void fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
-    assert(ctx->initialized);
-    assert(ctx->idx == N32);
-    assert(size % 4 == 0);
-    assert(size >= N32);
+void
+fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
+	assert(ctx->initialized);
+	assert(ctx->idx == N32);
+	assert(size % 4 == 0);
+	assert(size >= N32);
 
-    gen_rand_array(ctx, (w128_t *)array, size / 4);
-    ctx->idx = N32;
+	gen_rand_array(ctx, (w128_t *)array, size / 4);
+	ctx->idx = N32;
 }
 #endif
 
@@ -584,17 +608,18 @@ void fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
  * returns the pointer to the aligned memory block.
  */
-void fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
-    assert(ctx->initialized);
-    assert(ctx->idx == N32);
-    assert(size % 2 == 0);
-    assert(size >= N64);
+void
+fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
+	assert(ctx->initialized);
+	assert(ctx->idx == N32);
+	assert(size % 2 == 0);
+	assert(size >= N64);
 
-    gen_rand_array(ctx, (w128_t *)array, size / 2);
-    ctx->idx = N32;
+	gen_rand_array(ctx, (w128_t *)array, size / 2);
+	ctx->idx = N32;
 
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    swap((w128_t *)array, size /2);
+	swap((w128_t *)array, size / 2);
 #endif
 }
 
@@ -604,29 +629,31 @@ void fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
  *
  * @param seed a 32-bit integer used as the seed.
  */
-sfmt_t *init_gen_rand(uint32_t seed) {
-    void *p;
-    sfmt_t *ctx;
-    int i;
-    uint32_t *psfmt32;
+sfmt_t *
+init_gen_rand(uint32_t seed) {
+	void     *p;
+	sfmt_t   *ctx;
+	int       i;
+	uint32_t *psfmt32;
 
-    if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
-	return NULL;
-    }
-    ctx = (sfmt_t *)p;
-    psfmt32 = &ctx->sfmt[0].u[0];
+	if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
+		return NULL;
+	}
+	ctx = (sfmt_t *)p;
+	psfmt32 = &ctx->sfmt[0].u[0];
 
-    psfmt32[idxof(0)] = seed;
-    for (i = 1; i < N32; i++) {
-	psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]
-					    ^ (psfmt32[idxof(i - 1)] >> 30))
-	    + i;
-    }
-    ctx->idx = N32;
-    period_certification(ctx);
-    ctx->initialized = 1;
+	psfmt32[idxof(0)] = seed;
+	for (i = 1; i < N32; i++) {
+		psfmt32[idxof(i)] = 1812433253UL
+		        * (psfmt32[idxof(i - 1)]
+		            ^ (psfmt32[idxof(i - 1)] >> 30))
+		    + i;
+	}
+	ctx->idx = N32;
+	period_certification(ctx);
+	ctx->initialized = 1;
 
-    return ctx;
+	return ctx;
 }
 
 /**
@@ -635,85 +662,87 @@ sfmt_t *init_gen_rand(uint32_t seed) {
  * @param init_key the array of 32-bit integers, used as a seed.
  * @param key_length the length of init_key.
  */
-sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
-    void *p;
-    sfmt_t *ctx;
-    int i, j, count;
-    uint32_t r;
-    int lag;
-    int mid;
-    int size = N * 4;
-    uint32_t *psfmt32;
+sfmt_t *
+init_by_array(uint32_t *init_key, int key_length) {
+	void     *p;
+	sfmt_t   *ctx;
+	int       i, j, count;
+	uint32_t  r;
+	int       lag;
+	int       mid;
+	int       size = N * 4;
+	uint32_t *psfmt32;
 
-    if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
-	return NULL;
-    }
-    ctx = (sfmt_t *)p;
-    psfmt32 = &ctx->sfmt[0].u[0];
+	if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
+		return NULL;
+	}
+	ctx = (sfmt_t *)p;
+	psfmt32 = &ctx->sfmt[0].u[0];
 
-    if (size >= 623) {
-	lag = 11;
-    } else if (size >= 68) {
-	lag = 7;
-    } else if (size >= 39) {
-	lag = 5;
-    } else {
-	lag = 3;
-    }
-    mid = (size - lag) / 2;
+	if (size >= 623) {
+		lag = 11;
+	} else if (size >= 68) {
+		lag = 7;
+	} else if (size >= 39) {
+		lag = 5;
+	} else {
+		lag = 3;
+	}
+	mid = (size - lag) / 2;
 
-    memset(ctx->sfmt, 0x8b, sizeof(ctx->sfmt));
-    if (key_length + 1 > N32) {
-	count = key_length + 1;
-    } else {
-	count = N32;
-    }
-    r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]
-	      ^ psfmt32[idxof(N32 - 1)]);
-    psfmt32[idxof(mid)] += r;
-    r += key_length;
-    psfmt32[idxof(mid + lag)] += r;
-    psfmt32[idxof(0)] = r;
+	memset(ctx->sfmt, 0x8b, sizeof(ctx->sfmt));
+	if (key_length + 1 > N32) {
+		count = key_length + 1;
+	} else {
+		count = N32;
+	}
+	r = func1(
+	    psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] ^ psfmt32[idxof(N32 - 1)]);
+	psfmt32[idxof(mid)] += r;
+	r += key_length;
+	psfmt32[idxof(mid + lag)] += r;
+	psfmt32[idxof(0)] = r;
 
-    count--;
-    for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
-		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] += r;
-	r += init_key[j] + i;
-	psfmt32[idxof((i + mid + lag) % N32)] += r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
-    for (; j < count; j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
-		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] += r;
-	r += i;
-	psfmt32[idxof((i + mid + lag) % N32)] += r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
-    for (j = 0; j < N32; j++) {
-	r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
-		  + psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] ^= r;
-	r -= i;
-	psfmt32[idxof((i + mid + lag) % N32)] ^= r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
+	count--;
+	for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
+		r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
+		    ^ psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] += r;
+		r += init_key[j] + i;
+		psfmt32[idxof((i + mid + lag) % N32)] += r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
+	for (; j < count; j++) {
+		r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
+		    ^ psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] += r;
+		r += i;
+		psfmt32[idxof((i + mid + lag) % N32)] += r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
+	for (j = 0; j < N32; j++) {
+		r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
+		    + psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] ^= r;
+		r -= i;
+		psfmt32[idxof((i + mid + lag) % N32)] ^= r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
 
-    ctx->idx = N32;
-    period_certification(ctx);
-    ctx->initialized = 1;
+	ctx->idx = N32;
+	period_certification(ctx);
+	ctx->initialized = 1;
 
-    return ctx;
+	return ctx;
 }
 
-void fini_gen_rand(sfmt_t *ctx) {
-    assert(ctx != NULL);
+void
+fini_gen_rand(sfmt_t *ctx) {
+	assert(ctx != NULL);
 
-    ctx->initialized = 0;
-    free(ctx);
+	ctx->initialized = 0;
+	free(ctx);
 }
diff --git a/test/src/mtx.c b/test/src/mtx.c
index d9ce375c..05c922bf 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -1,14 +1,14 @@
 #include "test/jemalloc_test.h"
 
-#ifndef _CRT_SPINCOUNT
-#define _CRT_SPINCOUNT 4000
+#if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
+#	define _CRT_SPINCOUNT 4000
 #endif
 
 bool
 mtx_init(mtx_t *mtx) {
 #ifdef _WIN32
-	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock,
-	    _CRT_SPINCOUNT)) {
+	if (!InitializeCriticalSectionAndSpinCount(
+	        &mtx->lock, _CRT_SPINCOUNT)) {
 		return true;
 	}
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
diff --git a/test/src/sleep.c b/test/src/sleep.c
index 2234b4bc..96b9b7bf 100644
--- a/test/src/sleep.c
+++ b/test/src/sleep.c
@@ -6,7 +6,7 @@
  */
 void
 sleep_ns(unsigned ns) {
-	assert(ns <= 1000*1000*1000);
+	assert(ns <= 1000 * 1000 * 1000);
 
 #ifdef _WIN32
 	Sleep(ns / 1000 / 1000);
@@ -14,7 +14,7 @@ sleep_ns(unsigned ns) {
 	{
 		struct timespec timeout;
 
-		if (ns < 1000*1000*1000) {
+		if (ns < 1000 * 1000 * 1000) {
 			timeout.tv_sec = 0;
 			timeout.tv_nsec = ns;
 		} else {
diff --git a/test/src/test.c b/test/src/test.c
index 4cd803e5..e5e33ae6 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -2,10 +2,11 @@
 
 /* Test status state. */
 
-static unsigned		test_count = 0;
-static test_status_t	test_counts[test_status_count] = {0, 0, 0};
-static test_status_t	test_status = test_status_pass;
-static const char *	test_name = "";
+static unsigned      test_count = 0;
+static test_status_t test_counts[test_status_count] = {0, 0, 0};
+static test_status_t test_status = test_status_pass;
+static const char   *test_name = "";
+static const char   *selected_test_name = NULL;
 
 /* Reentrancy testing helpers. */
 
@@ -35,7 +36,7 @@ reentrancy_t_str(reentrancy_t r) {
 }
 
 static void
-do_hook(bool *hook_ran, void (**hook)()) {
+do_hook(bool *hook_ran, void (**hook)(void)) {
 	*hook_ran = true;
 	*hook = NULL;
 
@@ -47,12 +48,12 @@ do_hook(bool *hook_ran, void (**hook)()) {
 }
 
 static void
-libc_reentrancy_hook() {
+libc_reentrancy_hook(void) {
 	do_hook(&libc_hook_ran, &test_hooks_libc_hook);
 }
 
 static void
-arena_new_reentrancy_hook() {
+arena_new_reentrancy_hook(void) {
 	do_hook(&arena_new_hook_ran, &test_hooks_arena_new_hook);
 }
 
@@ -89,22 +90,37 @@ test_fail(const char *format, ...) {
 static const char *
 test_status_string(test_status_t current_status) {
 	switch (current_status) {
-	case test_status_pass: return "pass";
-	case test_status_skip: return "skip";
-	case test_status_fail: return "fail";
-	default: not_reached();
+	case test_status_pass:
+		return "pass";
+	case test_status_skip:
+		return "skip";
+	case test_status_fail:
+		return "fail";
+	default:
+		not_reached();
 	}
 }
 
-void
+bool
 p_test_init(const char *name) {
+	if (selected_test_name != NULL && strcmp(selected_test_name, name)) {
+		/* skip test */
+		return true;
+	}
+
 	test_count++;
 	test_status = test_status_pass;
 	test_name = name;
+
+	return false;
 }
 
 void
-p_test_fini(void) {
+p_test_fini(bool skip_test) {
+	if (skip_test) {
+		return;
+	}
+
 	test_counts[test_status]++;
 	malloc_printf("%s (%s): %s\n", test_name, reentrancy_t_str(reentrancy),
 	    test_status_string(test_status));
@@ -126,6 +142,8 @@ check_global_slow(test_status_t *status) {
 
 static test_status_t
 p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
+	selected_test_name = getenv("JEMALLOC_TEST_NAME");
+
 	test_status_t ret;
 
 	if (do_malloc_init) {
@@ -173,13 +191,16 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 		}
 	}
 
-	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
-	    test_status_string(test_status_pass),
+	bool colored = test_counts[test_status_fail] != 0
+	    && isatty(STDERR_FILENO);
+	const char *color_start = colored ? "\033[1;31m" : "";
+	const char *color_end = colored ? "\033[0m" : "";
+	malloc_printf("%s--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n%s",
+	    color_start, test_status_string(test_status_pass),
 	    test_counts[test_status_pass], test_count,
-	    test_status_string(test_status_skip),
-	    test_counts[test_status_skip], test_count,
-	    test_status_string(test_status_fail),
-	    test_counts[test_status_fail], test_count);
+	    test_status_string(test_status_skip), test_counts[test_status_skip],
+	    test_count, test_status_string(test_status_fail),
+	    test_counts[test_status_fail], test_count, color_end);
 
 	return ret;
 }
@@ -187,7 +208,7 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 test_status_t
 p_test(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -200,7 +221,7 @@ p_test(test_t *t, ...) {
 test_status_t
 p_test_no_reentrancy(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -213,7 +234,7 @@ p_test_no_reentrancy(test_t *t, ...) {
 test_status_t
 p_test_no_malloc_init(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -228,7 +249,15 @@ p_test_no_malloc_init(test_t *t, ...) {
 }
 
 void
-p_test_fail(const char *prefix, const char *message) {
-	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
+p_test_fail(bool may_abort, const char *prefix, const char *message) {
+	bool colored = test_counts[test_status_fail] != 0
+	    && isatty(STDERR_FILENO);
+	const char *color_start = colored ? "\033[1;31m" : "";
+	const char *color_end = colored ? "\033[0m" : "";
+	malloc_cprintf(
+	    NULL, NULL, "%s%s%s\n%s", color_start, prefix, message, color_end);
 	test_status = test_status_fail;
+	if (may_abort) {
+		abort();
+	}
 }
diff --git a/test/src/thd.c b/test/src/thd.c
index 9a15eabb..634dc262 100644
--- a/test/src/thd.c
+++ b/test/src/thd.c
@@ -14,7 +14,7 @@ void
 thd_join(thd_t thd, void **ret) {
 	if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) {
 		DWORD exit_code;
-		GetExitCodeThread(thd, (LPDWORD) &exit_code);
+		GetExitCodeThread(thd, (LPDWORD)&exit_code);
 		*ret = (void *)(uintptr_t)exit_code;
 	}
 }
@@ -32,3 +32,22 @@ thd_join(thd_t thd, void **ret) {
 	pthread_join(thd, ret);
 }
 #endif
+
+void
+thd_setname(const char *name) {
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	pthread_setname_np(pthread_self(), name);
+#elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+	pthread_set_name_np(pthread_self(), name);
+#endif
+}
+
+bool
+thd_has_setname(void) {
+#if defined(JEMALLOC_HAVE_PTHREAD_SETNAME_NP)                                  \
+    || defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+	return true;
+#else
+	return false;
+#endif
+}
diff --git a/test/src/timer.c b/test/src/timer.c
index 6e8b8edb..017bf5a5 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -25,11 +25,22 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	uint64_t t0 = timer_usec(a);
 	uint64_t t1 = timer_usec(b);
 	uint64_t mult;
-	size_t i = 0;
-	size_t j, n;
+	size_t   i = 0;
+	size_t   j, n;
+
+	/*
+ 	* The time difference could be 0 if the two clock readings are
+ 	* identical, either due to the operations being measured in the middle
+ 	* took very little time (or even got optimized away), or the clock
+ 	* readings are bad / very coarse grained clock.
+ 	* Thus, bump t1 if it is 0 to avoid dividing 0.
+ 	*/
+	if (t1 == 0) {
+		t1 = 1;
+	}
 
 	/* Whole. */
-	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
+	n = malloc_snprintf(&buf[i], buflen - i, "%" FMTu64, t0 / t1);
 	i += n;
 	if (i >= buflen) {
 		return;
@@ -40,15 +51,17 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	}
 
 	/* Decimal. */
-	n = malloc_snprintf(&buf[i], buflen-i, ".");
+	n = malloc_snprintf(&buf[i], buflen - i, ".");
 	i += n;
 
 	/* Fraction. */
-	while (i < buflen-1) {
-		uint64_t round = (i+1 == buflen-1 && ((t0 * mult * 10 / t1) % 10
-		    >= 5)) ? 1 : 0;
-		n = malloc_snprintf(&buf[i], buflen-i,
-		    "%"FMTu64, (t0 * mult / t1) % 10 + round);
+	while (i < buflen - 1) {
+		uint64_t round = (i + 1 == buflen - 1
+		                     && ((t0 * mult * 10 / t1) % 10 >= 5))
+		    ? 1
+		    : 0;
+		n = malloc_snprintf(&buf[i], buflen - i, "%" FMTu64,
+		    (t0 * mult / t1) % 10 + round);
 		i += n;
 		mult *= 10;
 	}
diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index 427e1cba..46ed0bf7 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -10,9 +10,9 @@ static size_t miblen = MIBLEN;
 #define HUGE_BATCH (1000 * 1000)
 #define HUGE_BATCH_ITER 100
 #define LEN (100 * 1000 * 1000)
-static void *batch_ptrs[LEN];
+static void  *batch_ptrs[LEN];
 static size_t batch_ptrs_next = 0;
-static void *item_ptrs[LEN];
+static void  *item_ptrs[LEN];
 static size_t item_ptrs_next = 0;
 
 #define SIZE 7
@@ -22,17 +22,18 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static void
 batch_alloc_wrapper(size_t batch) {
-	batch_alloc_packet_t batch_alloc_packet =
-	    {batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
+	batch_alloc_packet_t batch_alloc_packet = {
+	    batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
 	size_t filled;
 	size_t len = sizeof(size_t);
 	assert_d_eq(mallctlbymib(mib, miblen, &filled, &len,
-	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	                &batch_alloc_packet, sizeof(batch_alloc_packet)),
+	    0, "");
 	assert_zu_eq(filled, batch, "");
 }
 
@@ -94,9 +95,9 @@ compare_without_free(size_t batch, size_t iter,
 	batch_ptrs_next = 0;
 	release_and_clear(item_ptrs, item_ptrs_next);
 	item_ptrs_next = 0;
-	compare_funcs(0, iter,
-	    "batch allocation", batch_alloc_without_free_func,
-	    "item allocation", item_alloc_without_free_func);
+	compare_funcs(0, iter, "batch allocation",
+	    batch_alloc_without_free_func, "item allocation",
+	    item_alloc_without_free_func);
 	release_and_clear(batch_ptrs, batch_ptrs_next);
 	batch_ptrs_next = 0;
 	release_and_clear(item_ptrs, item_ptrs_next);
@@ -116,20 +117,19 @@ compare_with_free(size_t batch, size_t iter,
 	}
 	batch_ptrs_next = 0;
 	item_ptrs_next = 0;
-	compare_funcs(0, iter,
-	    "batch allocation", batch_alloc_with_free_func,
+	compare_funcs(0, iter, "batch allocation", batch_alloc_with_free_func,
 	    "item allocation", item_alloc_with_free_func);
 	batch_ptrs_next = 0;
 	item_ptrs_next = 0;
 }
 
 static void
-batch_alloc_without_free_tiny() {
+batch_alloc_without_free_tiny(void) {
 	batch_alloc_without_free(TINY_BATCH);
 }
 
 static void
-item_alloc_without_free_tiny() {
+item_alloc_without_free_tiny(void) {
 	item_alloc_without_free(TINY_BATCH);
 }
 
@@ -140,12 +140,12 @@ TEST_BEGIN(test_tiny_batch_without_free) {
 TEST_END
 
 static void
-batch_alloc_with_free_tiny() {
+batch_alloc_with_free_tiny(void) {
 	batch_alloc_with_free(TINY_BATCH);
 }
 
 static void
-item_alloc_with_free_tiny() {
+item_alloc_with_free_tiny(void) {
 	item_alloc_with_free(TINY_BATCH);
 }
 
@@ -156,12 +156,12 @@ TEST_BEGIN(test_tiny_batch_with_free) {
 TEST_END
 
 static void
-batch_alloc_without_free_huge() {
+batch_alloc_without_free_huge(void) {
 	batch_alloc_without_free(HUGE_BATCH);
 }
 
 static void
-item_alloc_without_free_huge() {
+item_alloc_without_free_huge(void) {
 	item_alloc_without_free(HUGE_BATCH);
 }
 
@@ -172,12 +172,12 @@ TEST_BEGIN(test_huge_batch_without_free) {
 TEST_END
 
 static void
-batch_alloc_with_free_huge() {
+batch_alloc_with_free_huge(void) {
 	batch_alloc_with_free(HUGE_BATCH);
 }
 
 static void
-item_alloc_with_free_huge() {
+item_alloc_with_free_huge(void) {
 	item_alloc_with_free(HUGE_BATCH);
 }
 
@@ -187,12 +187,11 @@ TEST_BEGIN(test_huge_batch_with_free) {
 }
 TEST_END
 
-int main(void) {
-	assert_d_eq(mallctlnametomib("experimental.batch_alloc", mib, &miblen),
-	    0, "");
-	return test_no_reentrancy(
-	    test_tiny_batch_without_free,
-	    test_tiny_batch_with_free,
-	    test_huge_batch_without_free,
+int
+main(void) {
+	assert_d_eq(
+	    mallctlnametomib("experimental.batch_alloc", mib, &miblen), 0, "");
+	return test_no_reentrancy(test_tiny_batch_without_free,
+	    test_tiny_batch_with_free, test_huge_batch_without_free,
 	    test_huge_batch_with_free);
 }
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
new file mode 100644
index 00000000..0c4697a6
--- /dev/null
+++ b/test/stress/cpp/microbench.cpp
@@ -0,0 +1,84 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+static void
+malloc_free(void) {
+	void *p = malloc(1);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
+	free((void *)p);
+}
+
+static void
+new_delete(void) {
+	void *p = ::operator new(1);
+	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	p = no_opt_ptr(p);
+	::operator delete((void *)p);
+}
+
+static void
+malloc_free_array(void) {
+	void *p = malloc(sizeof(int) * 8);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
+	free((void *)p);
+}
+
+static void
+new_delete_array(void) {
+	int *p = new int[8];
+	expect_ptr_not_null((void *)p, "Unexpected new[] failure");
+	p = (int *)no_opt_ptr((void *)p);
+	delete[] (int *)p;
+}
+
+#if __cpp_sized_deallocation >= 201309
+static void
+new_sized_delete(void) {
+	void *p = ::operator new(1);
+	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	p = no_opt_ptr(p);
+	::operator delete((void *)p, 1);
+}
+
+static void
+malloc_sdallocx(void) {
+	void *p = malloc(1);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
+	sdallocx((void *)p, 1, 0);
+}
+#endif
+
+TEST_BEGIN(test_free_vs_delete) {
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_free",
+	    (void *)malloc_free, "new_delete", (void *)new_delete);
+}
+TEST_END
+
+TEST_BEGIN(test_free_array_vs_delete_array) {
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_free_array",
+	    (void *)malloc_free_array, "delete_array",
+	    (void *)new_delete_array);
+}
+TEST_END
+
+TEST_BEGIN(test_sized_delete_vs_sdallocx) {
+#if __cpp_sized_deallocation >= 201309
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "new_size_delete",
+	    (void *)new_sized_delete, "malloc_sdallocx",
+	    (void *)malloc_sdallocx);
+#else
+	malloc_printf(
+	    "Skipping test_sized_delete_vs_sdallocx since \
+	    sized deallocation is not enabled.\n");
+#endif
+}
+TEST_END
+
+int
+main() {
+	return test_no_reentrancy(test_free_vs_delete,
+	    test_free_array_vs_delete_array, test_sized_delete_vs_sdallocx);
+}
diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
index a2db044d..c7b13404 100644
--- a/test/stress/fill_flush.c
+++ b/test/stress/fill_flush.c
@@ -5,6 +5,7 @@
 #define LARGE_ALLOC_SIZE SC_LARGE_MINCLASS
 #define NALLOCS 1000
 
+const char *malloc_conf = "tcache_ncached_max:8-128:1024";
 /*
  * We make this volatile so the 1-at-a-time variants can't leave the allocation
  * in a register, just to try to get the cache behavior closer.
@@ -34,9 +35,9 @@ item_alloc_dalloc_small(void) {
 }
 
 TEST_BEGIN(test_array_vs_item_small) {
-	compare_funcs(1 * 1000, 10 * 1000,
-	    "array of small allocations", array_alloc_dalloc_small,
-	    "small item allocation", item_alloc_dalloc_small);
+	compare_funcs(1 * 1000, 10 * 1000, "array of small allocations",
+	    array_alloc_dalloc_small, "small item allocation",
+	    item_alloc_dalloc_small);
 }
 TEST_END
 
@@ -63,14 +64,14 @@ item_alloc_dalloc_large(void) {
 }
 
 TEST_BEGIN(test_array_vs_item_large) {
-	compare_funcs(100, 1000,
-	    "array of large allocations", array_alloc_dalloc_large,
-	    "large item allocation", item_alloc_dalloc_large);
+	compare_funcs(100, 1000, "array of large allocations",
+	    array_alloc_dalloc_large, "large item allocation",
+	    item_alloc_dalloc_large);
 }
 TEST_END
 
-int main(void) {
+int
+main(void) {
 	return test_no_reentrancy(
-	    test_array_vs_item_small,
-	    test_array_vs_item_large);
+	    test_array_vs_item_small, test_array_vs_item_large);
 }
diff --git a/test/stress/hookbench.c b/test/stress/hookbench.c
index 97e90b0e..455e4c56 100644
--- a/test/stress/hookbench.c
+++ b/test/stress/hookbench.c
@@ -2,19 +2,16 @@
 
 static void
 noop_alloc_hook(void *extra, hook_alloc_t type, void *result,
-    uintptr_t result_raw, uintptr_t args_raw[3]) {
-}
+    uintptr_t result_raw, uintptr_t args_raw[3]) {}
 
 static void
-noop_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]) {
-}
+noop_dalloc_hook(
+    void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {}
 
 static void
 noop_expand_hook(void *extra, hook_expand_t type, void *address,
     size_t old_usize, size_t new_usize, uintptr_t result_raw,
-    uintptr_t args_raw[4]) {
-}
+    uintptr_t args_raw[4]) {}
 
 static void
 malloc_free_loop(int iters) {
@@ -26,23 +23,23 @@ malloc_free_loop(int iters) {
 
 static void
 test_hooked(int iters) {
-	hooks_t hooks = {&noop_alloc_hook, &noop_dalloc_hook, &noop_expand_hook,
-		NULL};
+	hooks_t hooks = {
+	    &noop_alloc_hook, &noop_dalloc_hook, &noop_expand_hook, NULL};
 
-	int err;
-	void *handles[HOOK_MAX];
+	int    err;
+	void  *handles[HOOK_MAX];
 	size_t sz = sizeof(handles[0]);
 
 	for (int i = 0; i < HOOK_MAX; i++) {
-		err = mallctl("experimental.hooks.install", &handles[i],
-		    &sz, &hooks, sizeof(hooks));
+		err = mallctl("experimental.hooks.install", &handles[i], &sz,
+		    &hooks, sizeof(hooks));
 		assert(err == 0);
 
 		timedelta_t timer;
 		timer_start(&timer);
 		malloc_free_loop(iters);
 		timer_stop(&timer);
-		malloc_printf("With %d hook%s: %"FMTu64"us\n", i + 1,
+		malloc_printf("With %d hook%s: %" FMTu64 "us\n", i + 1,
 		    i + 1 == 1 ? "" : "s", timer_usec(&timer));
 	}
 	for (int i = 0; i < HOOK_MAX; i++) {
@@ -59,7 +56,7 @@ test_unhooked(int iters) {
 	malloc_free_loop(iters);
 	timer_stop(&timer);
 
-	malloc_printf("Without hooks: %"FMTu64"us\n", timer_usec(&timer));
+	malloc_printf("Without hooks: %" FMTu64 "us\n", timer_usec(&timer));
 }
 
 int
diff --git a/test/stress/large_microbench.c b/test/stress/large_microbench.c
index c66b33a1..785ed836 100644
--- a/test/stress/large_microbench.c
+++ b/test/stress/large_microbench.c
@@ -9,6 +9,7 @@ large_mallocx_free(void) {
 	 */
 	void *p = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
 	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	p = no_opt_ptr(p);
 	free(p);
 }
 
@@ -16,18 +17,17 @@ static void
 small_mallocx_free(void) {
 	void *p = mallocx(16, 0);
 	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	p = no_opt_ptr(p);
 	free(p);
 }
 
 TEST_BEGIN(test_large_vs_small) {
-	compare_funcs(100*1000, 1*1000*1000, "large mallocx",
+	compare_funcs(100 * 1000, 1 * 1000 * 1000, "large mallocx",
 	    large_mallocx_free, "small mallocx", small_mallocx_free);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_large_vs_small);
+	return test_no_reentrancy(test_large_vs_small);
 }
-
diff --git a/test/stress/mallctl.c b/test/stress/mallctl.c
index d29b3118..b4c0f560 100644
--- a/test/stress/mallctl.c
+++ b/test/stress/mallctl.c
@@ -4,8 +4,8 @@
 static void
 mallctl_short(void) {
 	const char *version;
-	size_t sz = sizeof(version);
-	int err = mallctl("version", &version, &sz, NULL, 0);
+	size_t      sz = sizeof(version);
+	int         err = mallctl("version", &version, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctl failure");
 }
 
@@ -13,19 +13,19 @@ size_t mib_short[1];
 
 static void
 mallctlbymib_short(void) {
-	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+	size_t      miblen = sizeof(mib_short) / sizeof(mib_short[0]);
 	const char *version;
-	size_t sz = sizeof(version);
+	size_t      sz = sizeof(version);
 	int err = mallctlbymib(mib_short, miblen, &version, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctlbymib failure");
 }
 
 TEST_BEGIN(test_mallctl_vs_mallctlbymib_short) {
-	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+	size_t miblen = sizeof(mib_short) / sizeof(mib_short[0]);
 
 	int err = mallctlnametomib("version", mib_short, &miblen);
 	assert_d_eq(err, 0, "mallctlnametomib failure");
-	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_short",
+	compare_funcs(10 * 1000 * 1000, 10 * 1000 * 1000, "mallctl_short",
 	    mallctl_short, "mallctlbymib_short", mallctlbymib_short);
 }
 TEST_END
@@ -33,9 +33,9 @@ TEST_END
 static void
 mallctl_long(void) {
 	uint64_t nmalloc;
-	size_t sz = sizeof(nmalloc);
-	int err = mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL,
-	    0);
+	size_t   sz = sizeof(nmalloc);
+	int      err = mallctl(
+            "stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctl failure");
 }
 
@@ -43,10 +43,10 @@ size_t mib_long[6];
 
 static void
 mallctlbymib_long(void) {
-	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
+	size_t   miblen = sizeof(mib_long) / sizeof(mib_long[0]);
 	uint64_t nmalloc;
-	size_t sz = sizeof(nmalloc);
-	int err = mallctlbymib(mib_long, miblen, &nmalloc, &sz, NULL, 0);
+	size_t   sz = sizeof(nmalloc);
+	int      err = mallctlbymib(mib_long, miblen, &nmalloc, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctlbymib failure");
 }
 
@@ -57,18 +57,17 @@ TEST_BEGIN(test_mallctl_vs_mallctlbymib_long) {
 	 */
 	test_skip_if(!config_stats);
 
-	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
-	int err = mallctlnametomib("stats.arenas.0.bins.0.nmalloc", mib_long,
-	    &miblen);
+	size_t miblen = sizeof(mib_long) / sizeof(mib_long[0]);
+	int    err = mallctlnametomib(
+            "stats.arenas.0.bins.0.nmalloc", mib_long, &miblen);
 	assert_d_eq(err, 0, "mallctlnametomib failure");
-	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_long",
+	compare_funcs(10 * 1000 * 1000, 10 * 1000 * 1000, "mallctl_long",
 	    mallctl_long, "mallctlbymib_long", mallctlbymib_long);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_mallctl_vs_mallctlbymib_short,
+	return test_no_reentrancy(test_mallctl_vs_mallctlbymib_short,
 	    test_mallctl_vs_mallctlbymib_long);
 }
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 062e32fd..3d261a92 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -9,6 +9,7 @@ malloc_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	free(p);
 }
 
@@ -19,11 +20,12 @@ mallocx_free(void) {
 		test_fail("Unexpected mallocx() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	free(p);
 }
 
 TEST_BEGIN(test_malloc_vs_mallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "malloc",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc",
 	    malloc_free, "mallocx", mallocx_free);
 }
 TEST_END
@@ -35,6 +37,7 @@ malloc_dallocx(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	dallocx(p, 0);
 }
 
@@ -45,18 +48,19 @@ malloc_sdallocx(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	sdallocx(p, 1, 0);
 }
 
 TEST_BEGIN(test_free_vs_dallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "free", malloc_free,
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "free", malloc_free,
 	    "dallocx", malloc_dallocx);
 }
 TEST_END
 
 TEST_BEGIN(test_dallocx_vs_sdallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
-	    "sdallocx", malloc_sdallocx);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "dallocx",
+	    malloc_dallocx, "sdallocx", malloc_sdallocx);
 }
 TEST_END
 
@@ -82,6 +86,7 @@ malloc_sallocx_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	if (sallocx(p, 0) < 1) {
 		test_fail("Unexpected sallocx() failure");
 	}
@@ -89,7 +94,7 @@ malloc_sallocx_free(void) {
 }
 
 TEST_BEGIN(test_mus_vs_sallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_usable_size",
 	    malloc_mus_free, "sallocx", malloc_sallocx_free);
 }
 TEST_END
@@ -103,6 +108,7 @@ malloc_nallocx_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	if (nallocx(1, 0) < 1) {
 		test_fail("Unexpected nallocx() failure");
 	}
@@ -110,17 +116,14 @@ malloc_nallocx_free(void) {
 }
 
 TEST_BEGIN(test_sallocx_vs_nallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "sallocx",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "sallocx",
 	    malloc_sallocx_free, "nallocx", malloc_nallocx_free);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_malloc_vs_mallocx,
-	    test_free_vs_dallocx,
-	    test_dallocx_vs_sdallocx,
-	    test_mus_vs_sallocx,
+	return test_no_reentrancy(test_malloc_vs_mallocx, test_free_vs_dallocx,
+	    test_dallocx_vs_sdallocx, test_mus_vs_sallocx,
 	    test_sallocx_vs_nallocx);
 }
diff --git a/test/stress/pa/.gitignore b/test/stress/pa/.gitignore
new file mode 100644
index 00000000..378ee4e0
--- /dev/null
+++ b/test/stress/pa/.gitignore
@@ -0,0 +1,23 @@
+# Ignore executable files
+pa_microbench
+pa_data_preprocessor
+
+# Ignore object files
+*.o
+*.d
+
+# Ignore temporary and backup files
+*~
+*.tmp
+*.bak
+
+# Ignore compiled output files
+*.out
+
+# Keep source files and documentation
+!*.c
+!*.cpp
+!*.h
+!*.md
+!*.sh
+!Makefile*
diff --git a/test/stress/pa/README.md b/test/stress/pa/README.md
new file mode 100644
index 00000000..4ae59bb9
--- /dev/null
+++ b/test/stress/pa/README.md
@@ -0,0 +1,118 @@
+# Page Allocator (PA) Microbenchmark Suite
+
+This directory contains a comprehensive microbenchmark suite for testing and analyzing jemalloc's Page Allocator (PA) system, including the Hugepage-aware Page Allocator (HPA) and Slab Extent Cache (SEC) components.
+
+## Overview
+
+The PA microbenchmark suite consists of two main programs designed to preprocess allocation traces and replay them against jemalloc's internal PA system to measure performance, memory usage, and allocation patterns.
+
+To summarize how to run it, assume we have a file `test/stress/pa/data/hpa.csv` collected from a real application using USDT, the simulation can be run as follows:
+```
+make tests_pa
+./test/stress/pa/pa_data_preprocessor hpa test/stress/pa/data/hpa.csv test/stress/pa/data/sample_hpa_output.csv
+./test/stress/pa/pa_microbench -p -o test/stress/pa/data/sample_hpa_stats.csv test/stress/pa/data/sample_hpa_output.csv
+```
+
+If it's sec, simply replace the first parameter passed to `pa_data_preprocessor` with sec.
+
+## Architecture
+
+### PA System Components
+
+The Page Allocator sits at the core of jemalloc's memory management hierarchy:
+
+```
+Application
+    ↓
+Arena (tcache, bins)
+    ↓
+PA (Page Allocator) ← This is what we benchmark
+    ├── HPA (Hugepage-aware Page Allocator)
+    └── SEC (Slab Extent Cache)
+    ↓
+Extent Management (emap, edata)
+    ↓
+Base Allocator
+    ↓
+OS (mmap/munmap)
+```
+
+### Microbenchmark Architecture
+
+```
+Raw Allocation Traces
+    ↓
+[pa_data_preprocessor] ← Preprocesses and filters traces
+    ↓
+CSV alloc/dalloc Files
+    ↓
+[pa_microbench] ← Replays against real PA system
+    ↓
+Performance Statistics & Analysis
+```
+
+## Programs
+
+### 1. pa_data_preprocessor
+
+A C++ data preprocessing tool that converts raw allocation traces into a standardized CSV format suitable for microbenchmarking.
+
+**Purpose:**
+- Parse and filter raw allocation trace data
+- Convert various trace formats to standardized CSV
+- Filter by process ID, thread ID, or other criteria
+- Validate and clean allocation/deallocation sequences
+
+### 2. pa_microbench
+
+A C microbenchmark that replays allocation traces against jemalloc's actual PA system to measure performance and behavior with HPA statistics collection.
+
+**Purpose:**
+- Initialize real PA infrastructure (HPA, SEC, base allocators, emaps)
+- Replay allocation/deallocation sequences from CSV traces
+- Measure allocation latency, memory usage, and fragmentation
+- Test different PA configurations (HPA-only vs HPA+SEC)
+- Generate detailed HPA internal statistics
+
+**Key Features:**
+- **Real PA Integration**: Uses jemalloc's actual PA implementation, not simulation
+- **Multi-shard Support**: Tests allocation patterns across multiple PA shards
+- **Configurable Modes**: Supports HPA-only mode (`-p`) and HPA+SEC mode (`-s`)
+- **Statistics Output**: Detailed per-shard statistics and timing data
+- **Configurable Intervals**: Customizable statistics output frequency (`-i/--interval`)
+
+## Building
+
+### Compilation
+
+```bash
+# Build both PA microbenchmark tools
+cd /path/to/jemalloc
+make tests_pa
+```
+
+This creates:
+- `test/stress/pa/pa_data_preprocessor` - Data preprocessing tool
+- `test/stress/pa/pa_microbench` - PA microbenchmark
+
+## Usage
+
+### Data Preprocessing
+
+```bash
+# Basic preprocessing
+./test/stress/pa/pa_data_preprocessor <hpa/sec> input_trace.txt output.csv
+```
+
+### Microbenchmark Execution
+
+```bash
+# Run with HPA + SEC (default mode)
+./test/stress/pa/pa_microbench -s -o stats.csv trace.csv
+
+# Run with HPA-only (no SEC)
+./test/stress/pa/pa_microbench -p -o stats.csv trace.csv
+
+# Show help
+./test/stress/pa/pa_microbench -h
+```
diff --git a/test/stress/pa/data/.gitignore b/test/stress/pa/data/.gitignore
new file mode 100644
index 00000000..3b8ddcbb
--- /dev/null
+++ b/test/stress/pa/data/.gitignore
@@ -0,0 +1,6 @@
+# Ignore data files
+*.csv
+
+# But keep example files
+!example_*.csv
+!*.md
diff --git a/test/stress/pa/pa_data_preprocessor.cpp b/test/stress/pa/pa_data_preprocessor.cpp
new file mode 100644
index 00000000..44e84e8c
--- /dev/null
+++ b/test/stress/pa/pa_data_preprocessor.cpp
@@ -0,0 +1,425 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <cstdint>
+#include <cassert>
+
+/*
+ * Page Allocator Data Preprocessor (C++ Version)
+ *
+ * This tool processes real allocation traces (collected via BPF)
+ * and converts them into a format suitable for the PA simulator.
+ *
+ * Supported input formats:
+ *   HPA: shard_ind_int,addr_int,nsecs_int,probe,size_int
+ *   SEC: process_id,thread_id,thread_name,nsecs_int,_c4,sec_ptr_int,sec_shard_ptr_int,edata_ptr_int,size_int,is_frequent_reuse_int
+ *
+ * Output format (5 columns):
+ *   shard_ind_int,operation_index,size_or_alloc_index,nsecs,is_frequent
+ *   where:
+ *   - shard_ind_int: shard index as integer
+ *   - operation_index: 0=alloc, 1=dalloc
+ *   - size_or_alloc_index: for alloc operations show bytes,
+ *                          for dalloc operations show index of corresponding alloc
+ *   - nsecs: nanonosec of some monotonic clock
+ *   - is_frequent: 1 if frequent reuse allocation, 0 otherwise
+ */
+
+enum class TraceFormat { HPA, SEC };
+
+struct TraceEvent {
+	int         shard_ind;
+	uintptr_t   addr;
+	uint64_t    nsecs;
+	std::string probe;
+	size_t      size;
+	bool        is_frequent;
+};
+
+struct AllocationRecord {
+	uintptr_t addr;
+	size_t    size;
+	int       shard_ind;
+	size_t    alloc_index;
+	uint64_t  nsecs;
+};
+
+class AllocationTracker {
+      private:
+	std::unordered_map<uintptr_t, AllocationRecord> records_;
+
+      public:
+	void
+	add_allocation(uintptr_t addr, size_t size, int shard_ind,
+	    size_t alloc_index, uint64_t nsecs) {
+		records_[addr] = {addr, size, shard_ind, alloc_index, nsecs};
+	}
+
+	AllocationRecord *
+	find_allocation(uintptr_t addr) {
+		auto it = records_.find(addr);
+		return (it != records_.end()) ? &it->second : nullptr;
+	}
+
+	void
+	remove_allocation(uintptr_t addr) {
+		records_.erase(addr);
+	}
+
+	size_t
+	count() const {
+		return records_.size();
+	}
+};
+
+class ArenaMapper {
+      private:
+	std::unordered_map<uintptr_t, int> sec_ptr_to_arena_;
+	int                                next_arena_index_;
+
+      public:
+	ArenaMapper() : next_arena_index_(0) {}
+
+	int
+	get_arena_index(uintptr_t sec_ptr) {
+		if (sec_ptr == 0) {
+			/* Should not be seeing null sec pointer anywhere. Use this as a sanity check.*/
+			return 0;
+		}
+
+		auto it = sec_ptr_to_arena_.find(sec_ptr);
+		if (it != sec_ptr_to_arena_.end()) {
+			return it->second;
+		}
+
+		/* New sec_ptr, assign next available arena index */
+		int arena_index = next_arena_index_++;
+		sec_ptr_to_arena_[sec_ptr] = arena_index;
+		return arena_index;
+	}
+
+	size_t
+	arena_count() const {
+		return sec_ptr_to_arena_.size();
+	}
+};
+
+bool
+is_alloc_operation(const std::string &probe) {
+	return (probe == "hpa_alloc" || probe == "sec_alloc");
+}
+
+bool
+is_dalloc_operation(const std::string &probe) {
+	return (probe == "hpa_dalloc" || probe == "sec_dalloc");
+}
+
+bool
+parse_hpa_line(const std::string &line, TraceEvent &event) {
+	std::istringstream ss(line);
+	std::string        token;
+
+	/* Parse shard_ind_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.shard_ind = std::stoi(token);
+
+	/* Parse addr_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.addr = std::stoull(token);
+
+	/* Parse nsecs_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.nsecs = std::stoull(token);
+
+	/* Parse probe */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.probe = token;
+
+	/* Parse size_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.size = std::stoull(token);
+
+	/* HPA format doesn't have is_frequent field, set default */
+	event.is_frequent = true;
+
+	return false;
+}
+
+bool
+parse_sec_line(
+    const std::string &line, TraceEvent &event, ArenaMapper &arena_mapper) {
+	std::istringstream ss(line);
+	std::string        token;
+
+	/* Skip process_id */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Skip thread_id */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Skip thread_name */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Parse nsecs_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.nsecs = std::stoull(token);
+
+	/* Parse operation */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	event.probe = token;
+
+	/* Parse sec_ptr_int (used for arena mapping) */
+	uintptr_t sec_ptr = 0;
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	if (!token.empty()) {
+		sec_ptr = std::stoull(token);
+	}
+
+	/* Map sec_ptr to arena index */
+	event.shard_ind = arena_mapper.get_arena_index(sec_ptr);
+
+	/* Skip sec_shard_ptr_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Parse edata_ptr_int (used as the address) */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	if (!token.empty()) {
+		event.addr = std::stoull(token);
+	} else {
+		event.addr = 0;
+	}
+
+	/* Parse size_int */
+	if (!std::getline(ss, token, ',')
+	    && !is_dalloc_operation(event.probe)) {
+		/* SEC format may not always have size for dalloc */
+		return true;
+	}
+	if (!token.empty()) {
+		event.size = std::stoull(token);
+	} else {
+		/* When no size given, this is a dalloc, size won't be used. */
+		event.size = 0;
+	}
+
+	/* Parse is_frequent_reuse_int */
+	if (!std::getline(ss, token, ',')
+	    && !is_dalloc_operation(event.probe)) {
+		return true;
+	}
+	if (!token.empty()) {
+		event.is_frequent = (std::stoi(token) != 0);
+	} else {
+		/*
+		 * When no is_frequent_reuse_int given, this is a dalloc,
+		 * is_frequent won't be used.
+		 */
+		event.is_frequent = false;
+	}
+
+	return false;
+}
+
+void
+write_output_header(std::ofstream &output) {
+	output << "shard_ind,operation,size_or_alloc_index,nsecs,is_frequent\n";
+}
+
+void
+write_output_event(std::ofstream &output, int shard_ind, int operation,
+    size_t value, uint64_t nsecs, bool is_frequent) {
+	output << shard_ind << "," << operation << "," << value << "," << nsecs
+	       << "," << (is_frequent ? 1 : 0) << "\n";
+}
+
+size_t
+process_trace_file(const std::string &input_filename,
+    const std::string &output_filename, TraceFormat format) {
+	std::ifstream input(input_filename);
+	if (!input.is_open()) {
+		std::cerr << "Failed to open input file: " << input_filename
+		          << std::endl;
+		return 0;
+	}
+
+	std::ofstream output(output_filename);
+	if (!output.is_open()) {
+		std::cerr << "Failed to open output file: " << output_filename
+		          << std::endl;
+		return 0;
+	}
+
+	AllocationTracker tracker;
+	ArenaMapper       arena_mapper; /* For SEC format arena mapping */
+
+	std::string line;
+	size_t      line_count = 0;
+	size_t      output_count = 0;
+	size_t      alloc_sequence = 0; /* Sequential index for allocations */
+	size_t      unmatched_frees = 0;
+
+	write_output_header(output);
+	std::cout << "Reading from: " << input_filename << std::endl;
+
+	/* Skip header line */
+	if (!std::getline(input, line)) {
+		std::cerr << "Error: Empty input file" << std::endl;
+		return 0;
+	}
+
+	while (std::getline(input, line)) {
+		line_count++;
+
+		/* Skip empty lines */
+		if (line.empty()) {
+			continue;
+		}
+
+		TraceEvent event;
+		bool       parse_error = false;
+
+		if (format == TraceFormat::HPA) {
+			parse_error = parse_hpa_line(line, event);
+		} else if (format == TraceFormat::SEC) {
+			parse_error = parse_sec_line(line, event, arena_mapper);
+		}
+
+		if (parse_error) {
+			continue;
+		}
+
+		if (is_alloc_operation(event.probe)) {
+			/* This is an allocation */
+			write_output_event(output, event.shard_ind, 0,
+			    event.size, event.nsecs, event.is_frequent);
+
+			/* Track this allocation with the current sequence number */
+			tracker.add_allocation(event.addr, event.size,
+			    event.shard_ind, alloc_sequence, event.nsecs);
+			alloc_sequence++;
+		} else if (is_dalloc_operation(event.probe)) {
+			/* This is a deallocation. Ignore dalloc without a corresponding alloc. */
+			AllocationRecord *record = tracker.find_allocation(
+			    event.addr);
+
+			if (record) {
+				/* Validate timing: deallocation should happen after allocation */
+				assert(event.nsecs >= record->nsecs);
+				/* Found matching allocation with valid timing */
+				write_output_event(output, event.shard_ind, 1,
+				    record->alloc_index, event.nsecs,
+				    event.is_frequent);
+				tracker.remove_allocation(event.addr);
+				output_count++; /* Count this deallocation */
+			} else {
+				unmatched_frees++;
+			}
+		} else {
+			std::cerr << "Unknown operation: " << event.probe
+			          << std::endl;
+		}
+	}
+
+	std::cout << "Processed " << line_count << " lines" << std::endl;
+	std::cout << "Unmatched frees: " << unmatched_frees << std::endl;
+	std::cout << "Extracted " << output_count << " alloc/dalloc pairs"
+	          << std::endl;
+	std::cout << "Results written to: " << output_filename << std::endl;
+
+	return output_count;
+}
+
+TraceFormat
+parse_format(const std::string &format_str) {
+	if (format_str == "hpa") {
+		return TraceFormat::HPA;
+	} else if (format_str == "sec") {
+		return TraceFormat::SEC;
+	} else {
+		throw std::invalid_argument(
+		    "Unknown format: " + format_str + ". Use 'hpa' or 'sec'");
+	}
+}
+
+int
+main(int argc, char *argv[]) {
+	if (argc < 4 || argc > 5) {
+		std::cerr << "Usage: " << argv[0]
+		          << " <format> <input_csv_file> <output_file>"
+		          << std::endl;
+		std::cerr << std::endl;
+		std::cerr << "Arguments:" << std::endl;
+		std::cerr << "  format          - Input format: 'hpa' or 'sec'"
+		          << std::endl;
+		std::cerr
+		    << "                    hpa: shard_ind_int,addr_int,nsecs_int,probe,size_int"
+		    << std::endl;
+		std::cerr
+		    << "                    sec: process_id,thread_id,thread_name,nsecs_int,_c4,sec_ptr_int,sec_shard_ptr_int,edata_ptr_int,size_int,is_frequent_reuse_int"
+		    << std::endl;
+		std::cerr << "  input_csv_file  - Input CSV trace file"
+		          << std::endl;
+		std::cerr
+		    << "  output_file     - Output file for simulator with format:"
+		    << std::endl;
+		std::cerr
+		    << "                    shard_ind,operation,size_or_alloc_index,nsecs,is_frequent"
+		    << std::endl;
+		std::cerr << std::endl;
+		std::cerr << "Output format:" << std::endl;
+		std::cerr << "  - operation: 0=alloc, 1=dalloc" << std::endl;
+		std::cerr
+		    << "  - size_or_alloc_index: bytes for alloc, alloc index for dalloc"
+		    << std::endl;
+		return 1;
+	}
+
+	try {
+		TraceFormat format = parse_format(argv[1]);
+		std::string input_file = argv[2];
+		std::string output_file = argv[3];
+
+		size_t events_generated = process_trace_file(
+		    input_file, output_file, format);
+
+		if (events_generated == 0) {
+			std::cerr
+			    << "No events generated. Check input file format and filtering criteria."
+			    << std::endl;
+			return 1;
+		}
+		return 0;
+	} catch (const std::exception &e) {
+		std::cerr << "Error: " << e.what() << std::endl;
+		return 1;
+	}
+}
diff --git a/test/stress/pa/pa_microbench.c b/test/stress/pa/pa_microbench.c
new file mode 100644
index 00000000..3e7d8aef
--- /dev/null
+++ b/test/stress/pa/pa_microbench.c
@@ -0,0 +1,706 @@
+#include "test/jemalloc_test.h"
+
+/* Additional includes for PA functionality */
+#include "jemalloc/internal/pa.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/sec.h"
+#include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/psset.h"
+
+/*
+ * PA Microbenchmark (Simplified Version)
+ *
+ * This tool reads allocation traces and simulates PA behavior
+ * for testing and understanding the allocation patterns.
+ *
+ * Features:
+ * 1. Reads CSV input file with format: shard_ind,operation,size_or_alloc_index,is_frequent
+ * 2. Simulates allocations/deallocations tracking
+ * 3. Provides basic statistics analysis
+ * 4. Validates the framework setup
+ */
+
+#define MAX_LINE_LENGTH 1024
+#define MAX_ALLOCATIONS 10000000
+#define MAX_ARENAS 128
+
+typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
+
+typedef struct {
+	int      shard_ind;
+	pa_op_t  operation;
+	size_t   size_or_alloc_index;
+	uint64_t nsecs;
+	int      is_frequent;
+} pa_event_t;
+
+typedef struct {
+	edata_t *edata;
+	size_t   size;
+	int      shard_ind;
+	bool     active;
+} allocation_record_t;
+
+/* Structure to group per-shard tracking statistics */
+typedef struct {
+	uint64_t alloc_count;     /* Number of allocations */
+	uint64_t dealloc_count;   /* Number of deallocations */
+	uint64_t bytes_allocated; /* Current bytes allocated */
+} shard_stats_t;
+
+/* Structure to group per-shard PA infrastructure */
+typedef struct {
+	base_t          *base;        /* Base allocator */
+	emap_t           emap;        /* Extent map */
+	pa_shard_t       pa_shard;    /* PA shard */
+	pa_shard_stats_t shard_stats; /* PA shard statistics */
+	malloc_mutex_t   stats_mtx;   /* Statistics mutex */
+} shard_infrastructure_t;
+
+static FILE                *g_stats_output = NULL; /* Output file for stats */
+static size_t               g_alloc_counter = 0; /* Global allocation counter */
+static allocation_record_t *g_alloc_records =
+    NULL;                     /* Global allocation tracking */
+static bool g_use_sec = true; /* Global flag for SEC vs HPA-only */
+
+/* Refactored arrays using structures */
+static shard_stats_t *g_shard_stats = NULL; /* Per-shard tracking statistics */
+static shard_infrastructure_t *g_shard_infra =
+    NULL;                         /* Per-shard PA infrastructure */
+static pa_central_t g_pa_central; /* Global PA central */
+
+/* Override for curtime */
+static hpa_hooks_t hpa_hooks_override;
+static nstime_t    cur_time_clock;
+
+void
+curtime(nstime_t *r_time, bool first_reading) {
+	if (first_reading) {
+		nstime_init_zero(r_time);
+	}
+	*r_time = cur_time_clock;
+}
+
+static void
+set_clock(uint64_t nsecs) {
+	nstime_init(&cur_time_clock, nsecs);
+}
+
+static void
+init_hpa_hooks() {
+	hpa_hooks_override = hpa_hooks_default;
+	hpa_hooks_override.curtime = curtime;
+}
+
+static void cleanup_pa_infrastructure(int num_shards);
+
+static bool
+initialize_pa_infrastructure(int num_shards) {
+	/*
+	 * Note when we call malloc, it resolves to je_malloc, while internal
+	 * functions like base_new resolve to jet_malloc.  This is because this
+	 * file is compiled with -DJEMALLOC_JET as a test.  This allows us to
+	 * completely isolate the PA infrastructure benchmark from the rest of
+	 * the jemalloc usage.
+	*/
+	void *dummy_jet = jet_malloc(16);
+	if (dummy_jet == NULL) {
+		fprintf(stderr, "Failed to initialize JET jemalloc\n");
+		return 1;
+	}
+
+	/* Force JET system to be fully initialized */
+	if (jet_mallctl("epoch", NULL, NULL, NULL, 0) != 0) {
+		fprintf(stderr, "Failed to initialize JET system fully\n");
+		jet_free(dummy_jet);
+		return 1;
+	}
+	jet_free(dummy_jet);
+
+	/* Allocate shard tracking statistics */
+	g_shard_stats = calloc(num_shards, sizeof(shard_stats_t));
+	if (g_shard_stats == NULL) {
+		printf("DEBUG: Failed to allocate shard stats\n");
+		return true;
+	}
+
+	/* Allocate shard infrastructure */
+	g_shard_infra = calloc(num_shards, sizeof(shard_infrastructure_t));
+	if (g_shard_infra == NULL) {
+		printf("DEBUG: Failed to allocate shard infrastructure\n");
+		free(g_shard_stats);
+		return true;
+	}
+
+	/* Initialize one base allocator for PA central */
+	base_t *central_base = base_new(tsd_tsdn(tsd_fetch()), 0 /* ind */,
+	    (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
+	if (central_base == NULL) {
+		printf("DEBUG: Failed to create central_base\n");
+		free(g_shard_stats);
+		free(g_shard_infra);
+		return true;
+	}
+
+	/* Initialize PA central with HPA enabled */
+	init_hpa_hooks();
+	if (pa_central_init(&g_pa_central, central_base, true /* hpa */,
+	        &hpa_hooks_override)) {
+		printf("DEBUG: Failed to initialize PA central\n");
+		base_delete(tsd_tsdn(tsd_fetch()), central_base);
+		free(g_shard_stats);
+		free(g_shard_infra);
+		return true;
+	}
+
+	for (int i = 0; i < num_shards; i++) {
+		/* Create a separate base allocator for each shard */
+		g_shard_infra[i].base = base_new(tsd_tsdn(tsd_fetch()),
+		    i /* ind */, (extent_hooks_t *)&ehooks_default_extent_hooks,
+		    /* metadata_use_hooks */ true);
+		if (g_shard_infra[i].base == NULL) {
+			printf("DEBUG: Failed to create base %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize emap for this shard */
+		if (emap_init(&g_shard_infra[i].emap, g_shard_infra[i].base,
+		        /* zeroed */ false)) {
+			printf("DEBUG: Failed to initialize emap %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize stats mutex */
+		if (malloc_mutex_init(&g_shard_infra[i].stats_mtx,
+		        "pa_shard_stats", WITNESS_RANK_OMIT,
+		        malloc_mutex_rank_exclusive)) {
+			printf(
+			    "DEBUG: Failed to initialize stats mutex %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize PA shard */
+		nstime_t cur_time;
+		nstime_init_zero(&cur_time);
+
+		if (pa_shard_init(tsd_tsdn(tsd_fetch()),
+		        &g_shard_infra[i].pa_shard, &g_pa_central,
+		        &g_shard_infra[i].emap /* emap */,
+		        g_shard_infra[i].base, i /* ind */,
+		        &g_shard_infra[i].shard_stats /* stats */,
+		        &g_shard_infra[i].stats_mtx /* stats_mtx */,
+		        &cur_time /* cur_time */,
+		        SIZE_MAX /* oversize_threshold */,
+		        -1 /* dirty_decay_ms */, -1 /* muzzy_decay_ms */)) {
+			printf("DEBUG: Failed to initialize PA shard %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Enable HPA for this shard with proper configuration */
+		hpa_shard_opts_t hpa_opts = HPA_SHARD_OPTS_DEFAULT;
+		hpa_opts.deferral_allowed =
+		    false; /* No background threads in microbench */
+
+		sec_opts_t sec_opts = SEC_OPTS_DEFAULT;
+		if (!g_use_sec) {
+			/* Disable SEC by setting nshards to 0 */
+			sec_opts.nshards = 0;
+		}
+
+		if (pa_shard_enable_hpa(tsd_tsdn(tsd_fetch()),
+		        &g_shard_infra[i].pa_shard, &hpa_opts, &sec_opts)) {
+			fprintf(
+			    stderr, "Failed to enable HPA on shard %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+	}
+
+	printf("PA infrastructure configured: HPA=enabled, SEC=%s\n",
+	    g_use_sec ? "enabled" : "disabled");
+
+	return false;
+}
+
+static void
+cleanup_pa_infrastructure(int num_shards) {
+	if (g_shard_infra != NULL) {
+		for (int i = 0; i < num_shards; i++) {
+			pa_shard_destroy(
+			    tsd_tsdn(tsd_fetch()), &g_shard_infra[i].pa_shard);
+			if (g_shard_infra[i].base != NULL) {
+				base_delete(tsd_tsdn(tsd_fetch()),
+				    g_shard_infra[i].base);
+			}
+		}
+		free(g_shard_infra);
+		g_shard_infra = NULL;
+	}
+
+	if (g_shard_stats != NULL) {
+		free(g_shard_stats);
+		g_shard_stats = NULL;
+	}
+}
+
+static bool
+parse_csv_line(const char *line, pa_event_t *event) {
+	/* Expected format: shard_ind,operation,size_or_alloc_index,is_frequent */
+	int operation;
+	int fields = sscanf(line, "%d,%d,%zu,%lu,%d", &event->shard_ind,
+	    &operation, &event->size_or_alloc_index, &event->nsecs,
+	    &event->is_frequent);
+
+	if (fields < 4) { /* is_frequent is optional */
+		return false;
+	}
+
+	if (fields == 4) {
+		event->is_frequent = 0; /* Default value */
+	}
+
+	if (operation == 0) {
+		event->operation = PA_ALLOC;
+	} else if (operation == 1) {
+		event->operation = PA_DALLOC;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static size_t
+load_trace_file(const char *filename, pa_event_t **events, int *max_shard_id) {
+	FILE *file = fopen(filename, "r");
+	if (!file) {
+		fprintf(stderr, "Failed to open trace file: %s\n", filename);
+		return 0;
+	}
+
+	*events = malloc(MAX_ALLOCATIONS * sizeof(pa_event_t));
+	if (!*events) {
+		fclose(file);
+		return 0;
+	}
+
+	char   line[MAX_LINE_LENGTH];
+	size_t count = 0;
+	*max_shard_id = 0;
+
+	/* Skip header line */
+	if (fgets(line, sizeof(line), file) == NULL) {
+		fclose(file);
+		free(*events);
+		return 0;
+	}
+
+	while (fgets(line, sizeof(line), file) && count < MAX_ALLOCATIONS) {
+		if (parse_csv_line(line, &(*events)[count])) {
+			if ((*events)[count].shard_ind > *max_shard_id) {
+				*max_shard_id = (*events)[count].shard_ind;
+			}
+			count++;
+		}
+	}
+
+	fclose(file);
+	printf("Loaded %zu events from %s\n", count, filename);
+	printf("Maximum shard ID found: %d\n", *max_shard_id);
+	return count;
+}
+
+static void
+collect_hpa_stats(int shard_id, hpa_shard_stats_t *hpa_stats_out) {
+	/* Get tsdn for statistics collection */
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	/* Clear the output structure */
+	memset(hpa_stats_out, 0, sizeof(hpa_shard_stats_t));
+
+	/* Check if this shard has HPA enabled */
+	if (!g_shard_infra[shard_id].pa_shard.ever_used_hpa) {
+		return;
+	}
+
+	/* Merge HPA statistics from the shard */
+	hpa_shard_stats_merge(
+	    tsdn, &g_shard_infra[shard_id].pa_shard.hpa_shard, hpa_stats_out);
+}
+
+static void
+print_shard_stats(int shard_id, size_t operation_count) {
+	if (!g_stats_output) {
+		return;
+	}
+
+	/* Collect HPA statistics */
+	hpa_shard_stats_t hpa_stats;
+	collect_hpa_stats(shard_id, &hpa_stats);
+	psset_stats_t *psset_stats = &hpa_stats.psset_stats;
+
+	/* Total pageslabs */
+	size_t total_pageslabs = psset_stats->merged.npageslabs;
+
+	/* Full pageslabs breakdown by hugification */
+	size_t full_pageslabs_non_huge =
+	    psset_stats->full_slabs[0].npageslabs; /* [0] = non-hugified */
+	size_t full_pageslabs_huge =
+	    psset_stats->full_slabs[1].npageslabs; /* [1] = hugified */
+	size_t full_pageslabs_total = full_pageslabs_non_huge
+	    + full_pageslabs_huge;
+
+	/* Empty pageslabs breakdown by hugification */
+	size_t empty_pageslabs_non_huge =
+	    psset_stats->empty_slabs[0].npageslabs; /* [0] = non-hugified */
+	size_t empty_pageslabs_huge =
+	    psset_stats->empty_slabs[1].npageslabs; /* [1] = hugified */
+	size_t empty_pageslabs_total = empty_pageslabs_non_huge
+	    + empty_pageslabs_huge;
+
+	/* Hugified pageslabs (full + empty + partial) */
+	size_t hugified_pageslabs = full_pageslabs_huge + empty_pageslabs_huge;
+	/* Add hugified partial slabs */
+	for (int i = 0; i < PSSET_NPSIZES; i++) {
+		hugified_pageslabs +=
+		    psset_stats->nonfull_slabs[i][1].npageslabs;
+	}
+
+	/* Dirty bytes */
+	size_t   dirty_bytes = psset_stats->merged.ndirty * PAGE;
+	uint64_t npurge_passes = hpa_stats.nonderived_stats.npurge_passes;
+	uint64_t npurges = hpa_stats.nonderived_stats.npurges;
+
+	assert(g_use_sec
+	    || psset_stats->merged.nactive * PAGE
+	        == g_shard_stats[shard_id].bytes_allocated);
+	/* Output enhanced stats with detailed breakdown */
+	fprintf(g_stats_output,
+	    "%zu,%d,%lu,%lu,%lu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%lu,%lu,%lu"
+	    ",%lu,%lu\n",
+	    operation_count, shard_id, g_shard_stats[shard_id].alloc_count,
+	    g_shard_stats[shard_id].dealloc_count,
+	    g_shard_stats[shard_id].bytes_allocated, total_pageslabs,
+	    full_pageslabs_total, empty_pageslabs_total, hugified_pageslabs,
+	    full_pageslabs_non_huge, full_pageslabs_huge,
+	    empty_pageslabs_non_huge, empty_pageslabs_huge, dirty_bytes,
+	    hpa_stats.nonderived_stats.nhugifies,
+	    hpa_stats.nonderived_stats.nhugify_failures,
+	    hpa_stats.nonderived_stats.ndehugifies, npurge_passes, npurges);
+	fflush(g_stats_output);
+}
+
+static void
+simulate_trace(
+    int num_shards, pa_event_t *events, size_t count, size_t stats_interval) {
+	uint64_t total_allocs = 0, total_deallocs = 0;
+	uint64_t total_allocated_bytes = 0;
+
+	printf("Starting simulation with %zu events across %d shards...\n",
+	    count, num_shards);
+
+	for (size_t i = 0; i < count; i++) {
+		pa_event_t *event = &events[i];
+
+		/* Validate shard index */
+		if (event->shard_ind >= num_shards) {
+			fprintf(stderr,
+			    "Warning: Invalid shard index %d (max %d)\n",
+			    event->shard_ind, num_shards - 1);
+			continue;
+		}
+
+		set_clock(event->nsecs);
+		switch (event->operation) {
+		case PA_ALLOC: {
+			size_t size = event->size_or_alloc_index;
+
+			/* Get tsdn and calculate parameters for PA */
+			tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+			szind_t szind = sz_size2index(size);
+			bool    slab =
+			    event
+			        ->is_frequent; /* Use frequent_reuse for slab */
+			bool deferred_work_generated = false;
+
+			/* Allocate using PA allocator */
+			edata_t *edata = pa_alloc(tsdn,
+			    &g_shard_infra[event->shard_ind].pa_shard, size,
+			    PAGE /* alignment */, slab, szind, false /* zero */,
+			    false /* guarded */, &deferred_work_generated);
+
+			if (edata != NULL) {
+				/* Store allocation record */
+				g_alloc_records[g_alloc_counter].edata = edata;
+				g_alloc_records[g_alloc_counter].size = size;
+				g_alloc_records[g_alloc_counter].shard_ind =
+				    event->shard_ind;
+				g_alloc_records[g_alloc_counter].active = true;
+				g_alloc_counter++;
+
+				/* Update shard-specific stats */
+				g_shard_stats[event->shard_ind].alloc_count++;
+				g_shard_stats[event->shard_ind]
+				    .bytes_allocated += size;
+
+				total_allocs++;
+				total_allocated_bytes += size;
+			}
+			break;
+		}
+		case PA_DALLOC: {
+			size_t alloc_index = event->size_or_alloc_index;
+			if (alloc_index < g_alloc_counter
+			    && g_alloc_records[alloc_index].active
+			    && g_alloc_records[alloc_index].shard_ind
+			        == event->shard_ind) {
+				/* Get tsdn for PA */
+				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+				bool    deferred_work_generated = false;
+
+				/* Deallocate using PA allocator */
+				pa_dalloc(tsdn,
+				    &g_shard_infra[event->shard_ind].pa_shard,
+				    g_alloc_records[alloc_index].edata,
+				    &deferred_work_generated);
+
+				/* Update shard-specific stats */
+				g_shard_stats[event->shard_ind].dealloc_count++;
+				g_shard_stats[event->shard_ind]
+				    .bytes_allocated -=
+				    g_alloc_records[alloc_index].size;
+
+				g_alloc_records[alloc_index].active = false;
+				total_deallocs++;
+			}
+			break;
+		}
+		}
+
+		/* Periodic stats output and progress reporting */
+		if (stats_interval > 0 && (i + 1) % stats_interval == 0) {
+			/* Print stats for all shards */
+			for (int j = 0; j < num_shards; j++) {
+				print_shard_stats(j, i + 1);
+			}
+		}
+	}
+
+	printf("\nSimulation completed:\n");
+	printf("  Total allocations: %lu\n", total_allocs);
+	printf("  Total deallocations: %lu\n", total_deallocs);
+	printf("  Total allocated: %lu bytes\n", total_allocated_bytes);
+	printf("  Active allocations: %lu\n", g_alloc_counter - total_deallocs);
+
+	/* Print final stats for all shards */
+	printf("\nFinal shard statistics:\n");
+	for (int i = 0; i < num_shards; i++) {
+		printf(
+		    "  Shard %d: Allocs=%lu, Deallocs=%lu, Active Bytes=%lu\n",
+		    i, g_shard_stats[i].alloc_count,
+		    g_shard_stats[i].dealloc_count,
+		    g_shard_stats[i].bytes_allocated);
+
+		/* Final stats to file */
+		print_shard_stats(i, count);
+	}
+}
+
+static void
+cleanup_remaining_allocations(int num_shards) {
+	size_t cleaned_up = 0;
+
+	printf("Cleaning up remaining allocations...\n");
+
+	for (size_t i = 0; i < g_alloc_counter; i++) {
+		if (g_alloc_records[i].active) {
+			int shard_ind = g_alloc_records[i].shard_ind;
+			if (shard_ind < num_shards) {
+				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+				bool    deferred_work_generated = false;
+
+				pa_dalloc(tsdn,
+				    &g_shard_infra[shard_ind].pa_shard,
+				    g_alloc_records[i].edata,
+				    &deferred_work_generated);
+
+				g_alloc_records[i].active = false;
+				cleaned_up++;
+			}
+		}
+	}
+
+	printf("Cleaned up %zu remaining allocations\n", cleaned_up);
+}
+
+static void
+print_usage(const char *program) {
+	printf("Usage: %s [options] <trace_file.csv>\n", program);
+	printf("Options:\n");
+	printf("  -h, --help           Show this help message\n");
+	printf(
+	    "  -o, --output FILE    Output file for statistics (default: stdout)\n");
+	printf("  -s, --sec            Use SEC (default)\n");
+	printf("  -p, --hpa-only       Use HPA only (no SEC)\n");
+	printf(
+	    "  -i, --interval N     Stats print interval (default: 100000, 0=disable)\n");
+	printf(
+	    "\nTrace file format: shard_ind,operation,size_or_alloc_index,is_frequent\n");
+	printf("  - operation: 0=alloc, 1=dealloc\n");
+	printf("  - is_frequent: optional column\n");
+}
+
+int
+main(int argc, char *argv[]) {
+	const char *trace_file = NULL;
+	const char *stats_output_file = NULL;
+	size_t      stats_interval = 100000; /* Default stats print interval */
+	/* Parse command line arguments */
+	for (int i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "-h") == 0
+		    || strcmp(argv[i], "--help") == 0) {
+			print_usage(argv[0]);
+			return 0;
+		} else if (strcmp(argv[i], "-o") == 0
+		    || strcmp(argv[i], "--output") == 0) {
+			if (i + 1 >= argc) {
+				fprintf(stderr,
+				    "Error: %s requires an argument\n",
+				    argv[i]);
+				return 1;
+			}
+			stats_output_file = argv[++i];
+		} else if (strcmp(argv[i], "-s") == 0
+		    || strcmp(argv[i], "--sec") == 0) {
+			g_use_sec = true;
+		} else if (strcmp(argv[i], "-p") == 0
+		    || strcmp(argv[i], "--hpa-only") == 0) {
+			g_use_sec = false;
+		} else if (strcmp(argv[i], "-i") == 0
+		    || strcmp(argv[i], "--interval") == 0) {
+			if (i + 1 >= argc) {
+				fprintf(stderr,
+				    "Error: %s requires an argument\n",
+				    argv[i]);
+				return 1;
+			}
+			stats_interval = (size_t)atol(argv[++i]);
+		} else if (argv[i][0] != '-') {
+			trace_file = argv[i];
+		} else {
+			fprintf(stderr, "Unknown option: %s\n", argv[i]);
+			print_usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (!trace_file) {
+		fprintf(stderr, "Error: No trace file specified\n");
+		print_usage(argv[0]);
+		return 1;
+	}
+
+	printf("Trace file: %s\n", trace_file);
+	printf("Mode: %s\n", g_use_sec ? "PA with SEC" : "HPA only");
+
+	/* Open stats output file */
+	if (stats_output_file) {
+		g_stats_output = fopen(stats_output_file, "w");
+		if (!g_stats_output) {
+			fprintf(stderr,
+			    "Failed to open stats output file: %s\n",
+			    stats_output_file);
+			return 1;
+		}
+		printf("Stats output: %s\n", stats_output_file);
+
+		/* Write CSV header */
+		fprintf(g_stats_output,
+		    "operation_count,shard_id,alloc_count,dealloc_count,active_bytes,"
+		    "total_pageslabs,full_pageslabs_total,empty_pageslabs_total,hugified_pageslabs,"
+		    "full_pageslabs_non_huge,full_pageslabs_huge,"
+		    "empty_pageslabs_non_huge,empty_pageslabs_huge,"
+		    "dirty_bytes,nhugifies,nhugify_failures,ndehugifies,"
+		    "npurge_passes,npurges\n");
+	}
+
+	/* Load trace data and determine max number of arenas */
+	pa_event_t *events;
+	int         max_shard_id;
+	size_t      event_count = load_trace_file(
+            trace_file, &events, &max_shard_id);
+	if (event_count == 0) {
+		if (g_stats_output)
+			fclose(g_stats_output);
+		return 1;
+	}
+
+	int num_shards = max_shard_id + 1; /* shard IDs are 0-based */
+	if (num_shards > MAX_ARENAS) {
+		fprintf(stderr, "Error: Too many arenas required (%d > %d)\n",
+		    num_shards, MAX_ARENAS);
+		free(events);
+		if (g_stats_output)
+			fclose(g_stats_output);
+		return 1;
+	}
+
+	/* Allocate allocation tracking array */
+	g_alloc_records = malloc(event_count * sizeof(allocation_record_t));
+
+	if (!g_alloc_records) {
+		fprintf(
+		    stderr, "Failed to allocate allocation tracking array\n");
+		free(events);
+		if (g_stats_output) {
+			fclose(g_stats_output);
+		}
+		return 1;
+	}
+
+	/* Initialize PA infrastructure */
+	if (initialize_pa_infrastructure(num_shards)) {
+		fprintf(stderr, "Failed to initialize PA infrastructure\n");
+		free(events);
+		free(g_alloc_records);
+		if (g_stats_output) {
+			fclose(g_stats_output);
+		}
+		return 1;
+	}
+
+	/* Run simulation */
+	simulate_trace(num_shards, events, event_count, stats_interval);
+
+	/* Clean up remaining allocations */
+	cleanup_remaining_allocations(num_shards);
+
+	/* Cleanup PA infrastructure */
+	cleanup_pa_infrastructure(num_shards);
+
+	/* Cleanup */
+	free(g_alloc_records);
+	free(events);
+
+	if (g_stats_output) {
+		fclose(g_stats_output);
+		printf("Statistics written to: %s\n", stats_output_file);
+	}
+
+	return 0;
+}
diff --git a/test/test.sh.in b/test/test.sh.in
index 39302fff..dc13bc28 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -43,6 +43,7 @@ for t in $@; do
     # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
     enable_fill=@enable_fill@ \
     enable_prof=@enable_prof@ \
+    disable_large_size_classes=@disable_large_size_classes@ \
     . @srcroot@${t}.sh && \
     export_malloc_conf && \
     $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
@@ -63,8 +64,14 @@ for t in $@; do
       fail_count=$((fail_count+1))
       ;;
     *)
-      echo "Test harness error: ${t} w/ MALLOC_CONF=\"${MALLOC_CONF}\"" 1>&2
-      echo "Use prefix to debug, e.g. JEMALLOC_TEST_PREFIX=\"gdb --args\" sh test/test.sh ${t}" 1>&2
+      color_start=''
+      color_end=''
+      if [ -t 2 ] && tput colors >/dev/null 2>&1; then
+        color_start='\033[31m'
+        color_end='\033[0m'
+      fi
+      printf "${color_start}Test harness error: %s w/ MALLOC_CONF=\"%s\"${color_end}\n" "${t}" "${MALLOC_CONF}" 1>&2
+      printf "${color_start}Use prefix to debug, e.g. JEMALLOC_TEST_PREFIX=\"gdb --args\" sh test/test.sh %s${color_end}\n" "${t}" 1>&2
       exit 1
   esac
 done
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index b9f85dd9..8dbb61ed 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -40,1424 +40,1343 @@
 #define COUNT_1 1000
 #define COUNT_2 700
 
-static const uint32_t init_gen_rand_32_expected[] = {
-	3440181298U, 1564997079U, 1510669302U, 2930277156U, 1452439940U,
-	3796268453U,  423124208U, 2143818589U, 3827219408U, 2987036003U,
-	2674978610U, 1536842514U, 2027035537U, 2534897563U, 1686527725U,
-	 545368292U, 1489013321U, 1370534252U, 4231012796U, 3994803019U,
-	1764869045U,  824597505U,  862581900U, 2469764249U,  812862514U,
-	 359318673U,  116957936U, 3367389672U, 2327178354U, 1898245200U,
-	3206507879U, 2378925033U, 1040214787U, 2524778605U, 3088428700U,
-	1417665896U,  964324147U, 2282797708U, 2456269299U,  313400376U,
-	2245093271U, 1015729427U, 2694465011U, 3246975184U, 1992793635U,
-	 463679346U, 3721104591U, 3475064196U,  856141236U, 1499559719U,
-	3522818941U, 3721533109U, 1954826617U, 1282044024U, 1543279136U,
-	1301863085U, 2669145051U, 4221477354U, 3896016841U, 3392740262U,
-	 462466863U, 1037679449U, 1228140306U,  922298197U, 1205109853U,
-	1872938061U, 3102547608U, 2742766808U, 1888626088U, 4028039414U,
-	 157593879U, 1136901695U, 4038377686U, 3572517236U, 4231706728U,
-	2997311961U, 1189931652U, 3981543765U, 2826166703U,   87159245U,
-	1721379072U, 3897926942U, 1790395498U, 2569178939U, 1047368729U,
-	2340259131U, 3144212906U, 2301169789U, 2442885464U, 3034046771U,
-	3667880593U, 3935928400U, 2372805237U, 1666397115U, 2460584504U,
-	 513866770U, 3810869743U, 2147400037U, 2792078025U, 2941761810U,
-	3212265810U,  984692259U,  346590253U, 1804179199U, 3298543443U,
-	 750108141U, 2880257022U,  243310542U, 1869036465U, 1588062513U,
-	2983949551U, 1931450364U, 4034505847U, 2735030199U, 1628461061U,
-	2539522841U,  127965585U, 3992448871U,  913388237U,  559130076U,
-	1202933193U, 4087643167U, 2590021067U, 2256240196U, 1746697293U,
-	1013913783U, 1155864921U, 2715773730U,  915061862U, 1948766573U,
-	2322882854U, 3761119102U, 1343405684U, 3078711943U, 3067431651U,
-	3245156316U, 3588354584U, 3484623306U, 3899621563U, 4156689741U,
-	3237090058U, 3880063844U,  862416318U, 4039923869U, 2303788317U,
-	3073590536U,  701653667U, 2131530884U, 3169309950U, 2028486980U,
-	 747196777U, 3620218225U,  432016035U, 1449580595U, 2772266392U,
-	 444224948U, 1662832057U, 3184055582U, 3028331792U, 1861686254U,
-	1104864179U,  342430307U, 1350510923U, 3024656237U, 1028417492U,
-	2870772950U,  290847558U, 3675663500U,  508431529U, 4264340390U,
-	2263569913U, 1669302976U,  519511383U, 2706411211U, 3764615828U,
-	3883162495U, 4051445305U, 2412729798U, 3299405164U, 3991911166U,
-	2348767304U, 2664054906U, 3763609282U,  593943581U, 3757090046U,
-	2075338894U, 2020550814U, 4287452920U, 4290140003U, 1422957317U,
-	2512716667U, 2003485045U, 2307520103U, 2288472169U, 3940751663U,
-	4204638664U, 2892583423U, 1710068300U, 3904755993U, 2363243951U,
-	3038334120U,  547099465U,  771105860U, 3199983734U, 4282046461U,
-	2298388363U,  934810218U, 2837827901U, 3952500708U, 2095130248U,
-	3083335297U,   26885281U, 3932155283U, 1531751116U, 1425227133U,
-	 495654159U, 3279634176U, 3855562207U, 3957195338U, 4159985527U,
-	 893375062U, 1875515536U, 1327247422U, 3754140693U, 1028923197U,
-	1729880440U,  805571298U,  448971099U, 2726757106U, 2749436461U,
-	2485987104U,  175337042U, 3235477922U, 3882114302U, 2020970972U,
-	 943926109U, 2762587195U, 1904195558U, 3452650564U,  108432281U,
-	3893463573U, 3977583081U, 2636504348U, 1110673525U, 3548479841U,
-	4258854744U,  980047703U, 4057175418U, 3890008292U,  145653646U,
-	3141868989U, 3293216228U, 1194331837U, 1254570642U, 3049934521U,
-	2868313360U, 2886032750U, 1110873820U,  279553524U, 3007258565U,
-	1104807822U, 3186961098U,  315764646U, 2163680838U, 3574508994U,
-	3099755655U,  191957684U, 3642656737U, 3317946149U, 3522087636U,
-	 444526410U,  779157624U, 1088229627U, 1092460223U, 1856013765U,
-	3659877367U,  368270451U,  503570716U, 3000984671U, 2742789647U,
-	 928097709U, 2914109539U,  308843566U, 2816161253U, 3667192079U,
-	2762679057U, 3395240989U, 2928925038U, 1491465914U, 3458702834U,
-	3787782576U, 2894104823U, 1296880455U, 1253636503U,  989959407U,
-	2291560361U, 2776790436U, 1913178042U, 1584677829U,  689637520U,
-	1898406878U,  688391508U, 3385234998U,  845493284U, 1943591856U,
-	2720472050U,  222695101U, 1653320868U, 2904632120U, 4084936008U,
-	1080720688U, 3938032556U,  387896427U, 2650839632U,   99042991U,
-	1720913794U, 1047186003U, 1877048040U, 2090457659U,  517087501U,
-	4172014665U, 2129713163U, 2413533132U, 2760285054U, 4129272496U,
-	1317737175U, 2309566414U, 2228873332U, 3889671280U, 1110864630U,
-	3576797776U, 2074552772U,  832002644U, 3097122623U, 2464859298U,
-	2679603822U, 1667489885U, 3237652716U, 1478413938U, 1719340335U,
-	2306631119U,  639727358U, 3369698270U,  226902796U, 2099920751U,
-	1892289957U, 2201594097U, 3508197013U, 3495811856U, 3900381493U,
-	 841660320U, 3974501451U, 3360949056U, 1676829340U,  728899254U,
-	2047809627U, 2390948962U,  670165943U, 3412951831U, 4189320049U,
-	1911595255U, 2055363086U,  507170575U,  418219594U, 4141495280U,
-	2692088692U, 4203630654U, 3540093932U,  791986533U, 2237921051U,
-	2526864324U, 2956616642U, 1394958700U, 1983768223U, 1893373266U,
-	 591653646U,  228432437U, 1611046598U, 3007736357U, 1040040725U,
-	2726180733U, 2789804360U, 4263568405U,  829098158U, 3847722805U,
-	1123578029U, 1804276347U,  997971319U, 4203797076U, 4185199713U,
-	2811733626U, 2343642194U, 2985262313U, 1417930827U, 3759587724U,
-	1967077982U, 1585223204U, 1097475516U, 1903944948U,  740382444U,
-	1114142065U, 1541796065U, 1718384172U, 1544076191U, 1134682254U,
-	3519754455U, 2866243923U,  341865437U,  645498576U, 2690735853U,
-	1046963033U, 2493178460U, 1187604696U, 1619577821U,  488503634U,
-	3255768161U, 2306666149U, 1630514044U, 2377698367U, 2751503746U,
-	3794467088U, 1796415981U, 3657173746U,  409136296U, 1387122342U,
-	1297726519U,  219544855U, 4270285558U,  437578827U, 1444698679U,
-	2258519491U,  963109892U, 3982244073U, 3351535275U,  385328496U,
-	1804784013U,  698059346U, 3920535147U,  708331212U,  784338163U,
-	 785678147U, 1238376158U, 1557298846U, 2037809321U,  271576218U,
-	4145155269U, 1913481602U, 2763691931U,  588981080U, 1201098051U,
-	3717640232U, 1509206239U,  662536967U, 3180523616U, 1133105435U,
-	2963500837U, 2253971215U, 3153642623U, 1066925709U, 2582781958U,
-	3034720222U, 1090798544U, 2942170004U, 4036187520U,  686972531U,
-	2610990302U, 2641437026U, 1837562420U,  722096247U, 1315333033U,
-	2102231203U, 3402389208U, 3403698140U, 1312402831U, 2898426558U,
-	 814384596U,  385649582U, 1916643285U, 1924625106U, 2512905582U,
-	2501170304U, 4275223366U, 2841225246U, 1467663688U, 3563567847U,
-	2969208552U,  884750901U,  102992576U,  227844301U, 3681442994U,
-	3502881894U, 4034693299U, 1166727018U, 1697460687U, 1737778332U,
-	1787161139U, 1053003655U, 1215024478U, 2791616766U, 2525841204U,
-	1629323443U,    3233815U, 2003823032U, 3083834263U, 2379264872U,
-	3752392312U, 1287475550U, 3770904171U, 3004244617U, 1502117784U,
-	 918698423U, 2419857538U, 3864502062U, 1751322107U, 2188775056U,
-	4018728324U,  983712955U,  440071928U, 3710838677U, 2001027698U,
-	3994702151U,   22493119U, 3584400918U, 3446253670U, 4254789085U,
-	1405447860U, 1240245579U, 1800644159U, 1661363424U, 3278326132U,
-	3403623451U,   67092802U, 2609352193U, 3914150340U, 1814842761U,
-	3610830847U,  591531412U, 3880232807U, 1673505890U, 2585326991U,
-	1678544474U, 3148435887U, 3457217359U, 1193226330U, 2816576908U,
-	 154025329U,  121678860U, 1164915738U,  973873761U,  269116100U,
-	  52087970U,  744015362U,  498556057U,   94298882U, 1563271621U,
-	2383059628U, 4197367290U, 3958472990U, 2592083636U, 2906408439U,
-	1097742433U, 3924840517U,  264557272U, 2292287003U, 3203307984U,
-	4047038857U, 3820609705U, 2333416067U, 1839206046U, 3600944252U,
-	3412254904U,  583538222U, 2390557166U, 4140459427U, 2810357445U,
-	 226777499U, 2496151295U, 2207301712U, 3283683112U,  611630281U,
-	1933218215U, 3315610954U, 3889441987U, 3719454256U, 3957190521U,
-	1313998161U, 2365383016U, 3146941060U, 1801206260U,  796124080U,
-	2076248581U, 1747472464U, 3254365145U,  595543130U, 3573909503U,
-	3758250204U, 2020768540U, 2439254210U,   93368951U, 3155792250U,
-	2600232980U, 3709198295U, 3894900440U, 2971850836U, 1578909644U,
-	1443493395U, 2581621665U, 3086506297U, 2443465861U,  558107211U,
-	1519367835U,  249149686U,  908102264U, 2588765675U, 1232743965U,
-	1001330373U, 3561331654U, 2259301289U, 1564977624U, 3835077093U,
-	 727244906U, 4255738067U, 1214133513U, 2570786021U, 3899704621U,
-	1633861986U, 1636979509U, 1438500431U,   58463278U, 2823485629U,
-	2297430187U, 2926781924U, 3371352948U, 1864009023U, 2722267973U,
-	1444292075U,  437703973U, 1060414512U,  189705863U,  910018135U,
-	4077357964U,  884213423U, 2644986052U, 3973488374U, 1187906116U,
-	2331207875U,  780463700U, 3713351662U, 3854611290U,  412805574U,
-	2978462572U, 2176222820U,  829424696U, 2790788332U, 2750819108U,
-	1594611657U, 3899878394U, 3032870364U, 1702887682U, 1948167778U,
-	  14130042U,  192292500U,  947227076U,   90719497U, 3854230320U,
-	 784028434U, 2142399787U, 1563449646U, 2844400217U,  819143172U,
-	2883302356U, 2328055304U, 1328532246U, 2603885363U, 3375188924U,
-	 933941291U, 3627039714U, 2129697284U, 2167253953U, 2506905438U,
-	1412424497U, 2981395985U, 1418359660U, 2925902456U,   52752784U,
-	3713667988U, 3924669405U,  648975707U, 1145520213U, 4018650664U,
-	3805915440U, 2380542088U, 2013260958U, 3262572197U, 2465078101U,
-	1114540067U, 3728768081U, 2396958768U,  590672271U,  904818725U,
-	4263660715U,  700754408U, 1042601829U, 4094111823U, 4274838909U,
-	2512692617U, 2774300207U, 2057306915U, 3470942453U,   99333088U,
-	1142661026U, 2889931380U,   14316674U, 2201179167U,  415289459U,
-	 448265759U, 3515142743U, 3254903683U,  246633281U, 1184307224U,
-	2418347830U, 2092967314U, 2682072314U, 2558750234U, 2000352263U,
-	1544150531U,  399010405U, 1513946097U,  499682937U,  461167460U,
-	3045570638U, 1633669705U,  851492362U, 4052801922U, 2055266765U,
-	 635556996U,  368266356U, 2385737383U, 3218202352U, 2603772408U,
-	 349178792U,  226482567U, 3102426060U, 3575998268U, 2103001871U,
-	3243137071U,  225500688U, 1634718593U, 4283311431U, 4292122923U,
-	3842802787U,  811735523U,  105712518U,  663434053U, 1855889273U,
-	2847972595U, 1196355421U, 2552150115U, 4254510614U, 3752181265U,
-	3430721819U, 3828705396U, 3436287905U, 3441964937U, 4123670631U,
-	 353001539U,  459496439U, 3799690868U, 1293777660U, 2761079737U,
-	 498096339U, 3398433374U, 4080378380U, 2304691596U, 2995729055U,
-	4134660419U, 3903444024U, 3576494993U,  203682175U, 3321164857U,
-	2747963611U,   79749085U, 2992890370U, 1240278549U, 1772175713U,
-	2111331972U, 2655023449U, 1683896345U, 2836027212U, 3482868021U,
-	2489884874U,  756853961U, 2298874501U, 4013448667U, 4143996022U,
-	2948306858U, 4132920035U, 1283299272U,  995592228U, 3450508595U,
-	1027845759U, 1766942720U, 3861411826U, 1446861231U,   95974993U,
-	3502263554U, 1487532194U,  601502472U, 4129619129U,  250131773U,
-	2050079547U, 3198903947U, 3105589778U, 4066481316U, 3026383978U,
-	2276901713U,  365637751U, 2260718426U, 1394775634U, 1791172338U,
-	2690503163U, 2952737846U, 1568710462U,  732623190U, 2980358000U,
-	1053631832U, 1432426951U, 3229149635U, 1854113985U, 3719733532U,
-	3204031934U,  735775531U,  107468620U, 3734611984U,  631009402U,
-	3083622457U, 4109580626U,  159373458U, 1301970201U, 4132389302U,
-	1293255004U,  847182752U, 4170022737U,   96712900U, 2641406755U,
-	1381727755U,  405608287U, 4287919625U, 1703554290U, 3589580244U,
-	2911403488U,    2166565U, 2647306451U, 2330535117U, 1200815358U,
-	1165916754U,  245060911U, 4040679071U, 3684908771U, 2452834126U,
-	2486872773U, 2318678365U, 2940627908U, 1837837240U, 3447897409U,
-	4270484676U, 1495388728U, 3754288477U, 4204167884U, 1386977705U,
-	2692224733U, 3076249689U, 4109568048U, 4170955115U, 4167531356U,
-	4020189950U, 4261855038U, 3036907575U, 3410399885U, 3076395737U,
-	1046178638U,  144496770U,  230725846U, 3349637149U,   17065717U,
-	2809932048U, 2054581785U, 3608424964U, 3259628808U,  134897388U,
-	3743067463U,  257685904U, 3795656590U, 1562468719U, 3589103904U,
-	3120404710U,  254684547U, 2653661580U, 3663904795U, 2631942758U,
-	1063234347U, 2609732900U, 2332080715U, 3521125233U, 1180599599U,
-	1935868586U, 4110970440U,  296706371U, 2128666368U, 1319875791U,
-	1570900197U, 3096025483U, 1799882517U, 1928302007U, 1163707758U,
-	1244491489U, 3533770203U,  567496053U, 2757924305U, 2781639343U,
-	2818420107U,  560404889U, 2619609724U, 4176035430U, 2511289753U,
-	2521842019U, 3910553502U, 2926149387U, 3302078172U, 4237118867U,
-	 330725126U,  367400677U,  888239854U,  545570454U, 4259590525U,
-	 134343617U, 1102169784U, 1647463719U, 3260979784U, 1518840883U,
-	3631537963U, 3342671457U, 1301549147U, 2083739356U,  146593792U,
-	3217959080U,  652755743U, 2032187193U, 3898758414U, 1021358093U,
-	4037409230U, 2176407931U, 3427391950U, 2883553603U,  985613827U,
-	3105265092U, 3423168427U, 3387507672U,  467170288U, 2141266163U,
-	3723870208U,  916410914U, 1293987799U, 2652584950U,  769160137U,
-	3205292896U, 1561287359U, 1684510084U, 3136055621U, 3765171391U,
-	 639683232U, 2639569327U, 1218546948U, 4263586685U, 3058215773U,
-	2352279820U,  401870217U, 2625822463U, 1529125296U, 2981801895U,
-	1191285226U, 4027725437U, 3432700217U, 4098835661U,  971182783U,
-	2443861173U, 3881457123U, 3874386651U,  457276199U, 2638294160U,
-	4002809368U,  421169044U, 1112642589U, 3076213779U, 3387033971U,
-	2499610950U, 3057240914U, 1662679783U,  461224431U, 1168395933U
-};
-static const uint32_t init_by_array_32_expected[] = {
-	2920711183U, 3885745737U, 3501893680U,  856470934U, 1421864068U,
-	 277361036U, 1518638004U, 2328404353U, 3355513634U,   64329189U,
-	1624587673U, 3508467182U, 2481792141U, 3706480799U, 1925859037U,
-	2913275699U,  882658412U,  384641219U,  422202002U, 1873384891U,
-	2006084383U, 3924929912U, 1636718106U, 3108838742U, 1245465724U,
-	4195470535U,  779207191U, 1577721373U, 1390469554U, 2928648150U,
-	 121399709U, 3170839019U, 4044347501U,  953953814U, 3821710850U,
-	3085591323U, 3666535579U, 3577837737U, 2012008410U, 3565417471U,
-	4044408017U,  433600965U, 1637785608U, 1798509764U,  860770589U,
-	3081466273U, 3982393409U, 2451928325U, 3437124742U, 4093828739U,
-	3357389386U, 2154596123U,  496568176U, 2650035164U, 2472361850U,
-	   3438299U, 2150366101U, 1577256676U, 3802546413U, 1787774626U,
-	4078331588U, 3706103141U,  170391138U, 3806085154U, 1680970100U,
-	1961637521U, 3316029766U,  890610272U, 1453751581U, 1430283664U,
-	3051057411U, 3597003186U,  542563954U, 3796490244U, 1690016688U,
-	3448752238U,  440702173U,  347290497U, 1121336647U, 2540588620U,
-	 280881896U, 2495136428U,  213707396U,   15104824U, 2946180358U,
-	 659000016U,  566379385U, 2614030979U, 2855760170U,  334526548U,
-	2315569495U, 2729518615U,  564745877U, 1263517638U, 3157185798U,
-	1604852056U, 1011639885U, 2950579535U, 2524219188U,  312951012U,
-	1528896652U, 1327861054U, 2846910138U, 3966855905U, 2536721582U,
-	 855353911U, 1685434729U, 3303978929U, 1624872055U, 4020329649U,
-	3164802143U, 1642802700U, 1957727869U, 1792352426U, 3334618929U,
-	2631577923U, 3027156164U,  842334259U, 3353446843U, 1226432104U,
-	1742801369U, 3552852535U, 3471698828U, 1653910186U, 3380330939U,
-	2313782701U, 3351007196U, 2129839995U, 1800682418U, 4085884420U,
-	1625156629U, 3669701987U,  615211810U, 3294791649U, 4131143784U,
-	2590843588U, 3207422808U, 3275066464U,  561592872U, 3957205738U,
-	3396578098U,   48410678U, 3505556445U, 1005764855U, 3920606528U,
-	2936980473U, 2378918600U, 2404449845U, 1649515163U,  701203563U,
-	3705256349U,   83714199U, 3586854132U,  922978446U, 2863406304U,
-	3523398907U, 2606864832U, 2385399361U, 3171757816U, 4262841009U,
-	3645837721U, 1169579486U, 3666433897U, 3174689479U, 1457866976U,
-	3803895110U, 3346639145U, 1907224409U, 1978473712U, 1036712794U,
-	 980754888U, 1302782359U, 1765252468U,  459245755U, 3728923860U,
-	1512894209U, 2046491914U,  207860527U,  514188684U, 2288713615U,
-	1597354672U, 3349636117U, 2357291114U, 3995796221U,  945364213U,
-	1893326518U, 3770814016U, 1691552714U, 2397527410U,  967486361U,
-	 776416472U, 4197661421U,  951150819U, 1852770983U, 4044624181U,
-	1399439738U, 4194455275U, 2284037669U, 1550734958U, 3321078108U,
-	1865235926U, 2912129961U, 2664980877U, 1357572033U, 2600196436U,
-	2486728200U, 2372668724U, 1567316966U, 2374111491U, 1839843570U,
-	  20815612U, 3727008608U, 3871996229U,  824061249U, 1932503978U,
-	3404541726U,  758428924U, 2609331364U, 1223966026U, 1299179808U,
-	 648499352U, 2180134401U,  880821170U, 3781130950U,  113491270U,
-	1032413764U, 4185884695U, 2490396037U, 1201932817U, 4060951446U,
-	4165586898U, 1629813212U, 2887821158U,  415045333U,  628926856U,
-	2193466079U, 3391843445U, 2227540681U, 1907099846U, 2848448395U,
-	1717828221U, 1372704537U, 1707549841U, 2294058813U, 2101214437U,
-	2052479531U, 1695809164U, 3176587306U, 2632770465U,   81634404U,
-	1603220563U,  644238487U,  302857763U,  897352968U, 2613146653U,
-	1391730149U, 4245717312U, 4191828749U, 1948492526U, 2618174230U,
-	3992984522U, 2178852787U, 3596044509U, 3445573503U, 2026614616U,
-	 915763564U, 3415689334U, 2532153403U, 3879661562U, 2215027417U,
-	3111154986U, 2929478371U,  668346391U, 1152241381U, 2632029711U,
-	3004150659U, 2135025926U,  948690501U, 2799119116U, 4228829406U,
-	1981197489U, 4209064138U,  684318751U, 3459397845U,  201790843U,
-	4022541136U, 3043635877U,  492509624U, 3263466772U, 1509148086U,
-	 921459029U, 3198857146U,  705479721U, 3835966910U, 3603356465U,
-	 576159741U, 1742849431U,  594214882U, 2055294343U, 3634861861U,
-	 449571793U, 3246390646U, 3868232151U, 1479156585U, 2900125656U,
-	2464815318U, 3960178104U, 1784261920U,   18311476U, 3627135050U,
-	 644609697U,  424968996U,  919890700U, 2986824110U,  816423214U,
-	4003562844U, 1392714305U, 1757384428U, 2569030598U,  995949559U,
-	3875659880U, 2933807823U, 2752536860U, 2993858466U, 4030558899U,
-	2770783427U, 2775406005U, 2777781742U, 1931292655U,  472147933U,
-	3865853827U, 2726470545U, 2668412860U, 2887008249U,  408979190U,
-	3578063323U, 3242082049U, 1778193530U,   27981909U, 2362826515U,
-	 389875677U, 1043878156U,  581653903U, 3830568952U,  389535942U,
-	3713523185U, 2768373359U, 2526101582U, 1998618197U, 1160859704U,
-	3951172488U, 1098005003U,  906275699U, 3446228002U, 2220677963U,
-	2059306445U,  132199571U,  476838790U, 1868039399U, 3097344807U,
-	 857300945U,  396345050U, 2835919916U, 1782168828U, 1419519470U,
-	4288137521U,  819087232U,  596301494U,  872823172U, 1526888217U,
-	 805161465U, 1116186205U, 2829002754U, 2352620120U,  620121516U,
-	 354159268U, 3601949785U,  209568138U, 1352371732U, 2145977349U,
-	4236871834U, 1539414078U, 3558126206U, 3224857093U, 4164166682U,
-	3817553440U, 3301780278U, 2682696837U, 3734994768U, 1370950260U,
-	1477421202U, 2521315749U, 1330148125U, 1261554731U, 2769143688U,
-	3554756293U, 4235882678U, 3254686059U, 3530579953U, 1215452615U,
-	3574970923U, 4057131421U,  589224178U, 1000098193U,  171190718U,
-	2521852045U, 2351447494U, 2284441580U, 2646685513U, 3486933563U,
-	3789864960U, 1190528160U, 1702536782U, 1534105589U, 4262946827U,
-	2726686826U, 3584544841U, 2348270128U, 2145092281U, 2502718509U,
-	1027832411U, 3571171153U, 1287361161U, 4011474411U, 3241215351U,
-	2419700818U,  971242709U, 1361975763U, 1096842482U, 3271045537U,
-	  81165449U,  612438025U, 3912966678U, 1356929810U,  733545735U,
-	 537003843U, 1282953084U,  884458241U,  588930090U, 3930269801U,
-	2961472450U, 1219535534U, 3632251943U,  268183903U, 1441240533U,
-	3653903360U, 3854473319U, 2259087390U, 2548293048U, 2022641195U,
-	2105543911U, 1764085217U, 3246183186U,  482438805U,  888317895U,
-	2628314765U, 2466219854U,  717546004U, 2322237039U,  416725234U,
-	1544049923U, 1797944973U, 3398652364U, 3111909456U,  485742908U,
-	2277491072U, 1056355088U, 3181001278U,  129695079U, 2693624550U,
-	1764438564U, 3797785470U,  195503713U, 3266519725U, 2053389444U,
-	1961527818U, 3400226523U, 3777903038U, 2597274307U, 4235851091U,
-	4094406648U, 2171410785U, 1781151386U, 1378577117U,  654643266U,
-	3424024173U, 3385813322U,  679385799U,  479380913U,  681715441U,
-	3096225905U,  276813409U, 3854398070U, 2721105350U,  831263315U,
-	3276280337U, 2628301522U, 3984868494U, 1466099834U, 2104922114U,
-	1412672743U,  820330404U, 3491501010U,  942735832U,  710652807U,
-	3972652090U,  679881088U,   40577009U, 3705286397U, 2815423480U,
-	3566262429U,  663396513U, 3777887429U, 4016670678U,  404539370U,
-	1142712925U, 1140173408U, 2913248352U, 2872321286U,  263751841U,
-	3175196073U, 3162557581U, 2878996619U,   75498548U, 3836833140U,
-	3284664959U, 1157523805U,  112847376U,  207855609U, 1337979698U,
-	1222578451U,  157107174U,  901174378U, 3883717063U, 1618632639U,
-	1767889440U, 4264698824U, 1582999313U,  884471997U, 2508825098U,
-	3756370771U, 2457213553U, 3565776881U, 3709583214U,  915609601U,
-	 460833524U, 1091049576U,   85522880U,    2553251U,  132102809U,
-	2429882442U, 2562084610U, 1386507633U, 4112471229U,   21965213U,
-	1981516006U, 2418435617U, 3054872091U, 4251511224U, 2025783543U,
-	1916911512U, 2454491136U, 3938440891U, 3825869115U, 1121698605U,
-	3463052265U,  802340101U, 1912886800U, 4031997367U, 3550640406U,
-	1596096923U,  610150600U,  431464457U, 2541325046U,  486478003U,
-	 739704936U, 2862696430U, 3037903166U, 1129749694U, 2611481261U,
-	1228993498U,  510075548U, 3424962587U, 2458689681U,  818934833U,
-	4233309125U, 1608196251U, 3419476016U, 1858543939U, 2682166524U,
-	3317854285U,  631986188U, 3008214764U,  613826412U, 3567358221U,
-	3512343882U, 1552467474U, 3316162670U, 1275841024U, 4142173454U,
-	 565267881U,  768644821U,  198310105U, 2396688616U, 1837659011U,
-	 203429334U,  854539004U, 4235811518U, 3338304926U, 3730418692U,
-	3852254981U, 3032046452U, 2329811860U, 2303590566U, 2696092212U,
-	3894665932U,  145835667U,  249563655U, 1932210840U, 2431696407U,
-	3312636759U,  214962629U, 2092026914U, 3020145527U, 4073039873U,
-	2739105705U, 1308336752U,  855104522U, 2391715321U,   67448785U,
-	 547989482U,  854411802U, 3608633740U,  431731530U,  537375589U,
-	3888005760U,  696099141U,  397343236U, 1864511780U,   44029739U,
-	1729526891U, 1993398655U, 2010173426U, 2591546756U,  275223291U,
-	1503900299U, 4217765081U, 2185635252U, 1122436015U, 3550155364U,
-	 681707194U, 3260479338U,  933579397U, 2983029282U, 2505504587U,
-	2667410393U, 2962684490U, 4139721708U, 2658172284U, 2452602383U,
-	2607631612U, 1344296217U, 3075398709U, 2949785295U, 1049956168U,
-	3917185129U, 2155660174U, 3280524475U, 1503827867U,  674380765U,
-	1918468193U, 3843983676U,  634358221U, 2538335643U, 1873351298U,
-	3368723763U, 2129144130U, 3203528633U, 3087174986U, 2691698871U,
-	2516284287U,   24437745U, 1118381474U, 2816314867U, 2448576035U,
-	4281989654U,  217287825U,  165872888U, 2628995722U, 3533525116U,
-	2721669106U,  872340568U, 3429930655U, 3309047304U, 3916704967U,
-	3270160355U, 1348884255U, 1634797670U,  881214967U, 4259633554U,
-	 174613027U, 1103974314U, 1625224232U, 2678368291U, 1133866707U,
-	3853082619U, 4073196549U, 1189620777U,  637238656U,  930241537U,
-	4042750792U, 3842136042U, 2417007212U, 2524907510U, 1243036827U,
-	1282059441U, 3764588774U, 1394459615U, 2323620015U, 1166152231U,
-	3307479609U, 3849322257U, 3507445699U, 4247696636U,  758393720U,
-	 967665141U, 1095244571U, 1319812152U,  407678762U, 2640605208U,
-	2170766134U, 3663594275U, 4039329364U, 2512175520U,  725523154U,
-	2249807004U, 3312617979U, 2414634172U, 1278482215U,  349206484U,
-	1573063308U, 1196429124U, 3873264116U, 2400067801U,  268795167U,
-	 226175489U, 2961367263U, 1968719665U,   42656370U, 1010790699U,
-	 561600615U, 2422453992U, 3082197735U, 1636700484U, 3977715296U,
-	3125350482U, 3478021514U, 2227819446U, 1540868045U, 3061908980U,
-	1087362407U, 3625200291U,  361937537U,  580441897U, 1520043666U,
-	2270875402U, 1009161260U, 2502355842U, 4278769785U,  473902412U,
-	1057239083U, 1905829039U, 1483781177U, 2080011417U, 1207494246U,
-	1806991954U, 2194674403U, 3455972205U,  807207678U, 3655655687U,
-	 674112918U,  195425752U, 3917890095U, 1874364234U, 1837892715U,
-	3663478166U, 1548892014U, 2570748714U, 2049929836U, 2167029704U,
-	 697543767U, 3499545023U, 3342496315U, 1725251190U, 3561387469U,
-	2905606616U, 1580182447U, 3934525927U, 4103172792U, 1365672522U,
-	1534795737U, 3308667416U, 2841911405U, 3943182730U, 4072020313U,
-	3494770452U, 3332626671U,   55327267U,  478030603U,  411080625U,
-	3419529010U, 1604767823U, 3513468014U,  570668510U,  913790824U,
-	2283967995U,  695159462U, 3825542932U, 4150698144U, 1829758699U,
-	 202895590U, 1609122645U, 1267651008U, 2910315509U, 2511475445U,
-	2477423819U, 3932081579U,  900879979U, 2145588390U, 2670007504U,
-	 580819444U, 1864996828U, 2526325979U, 1019124258U,  815508628U,
-	2765933989U, 1277301341U, 3006021786U,  855540956U,  288025710U,
-	1919594237U, 2331223864U,  177452412U, 2475870369U, 2689291749U,
-	 865194284U,  253432152U, 2628531804U, 2861208555U, 2361597573U,
-	1653952120U, 1039661024U, 2159959078U, 3709040440U, 3564718533U,
-	2596878672U, 2041442161U,   31164696U, 2662962485U, 3665637339U,
-	1678115244U, 2699839832U, 3651968520U, 3521595541U,  458433303U,
-	2423096824U,   21831741U,  380011703U, 2498168716U,  861806087U,
-	1673574843U, 4188794405U, 2520563651U, 2632279153U, 2170465525U,
-	4171949898U, 3886039621U, 1661344005U, 3424285243U,  992588372U,
-	2500984144U, 2993248497U, 3590193895U, 1535327365U,  515645636U,
-	 131633450U, 3729760261U, 1613045101U, 3254194278U,   15889678U,
-	1493590689U,  244148718U, 2991472662U, 1401629333U,  777349878U,
-	2501401703U, 4285518317U, 3794656178U,  955526526U, 3442142820U,
-	3970298374U,  736025417U, 2737370764U, 1271509744U,  440570731U,
-	 136141826U, 1596189518U,  923399175U,  257541519U, 3505774281U,
-	2194358432U, 2518162991U, 1379893637U, 2667767062U, 3748146247U,
-	1821712620U, 3923161384U, 1947811444U, 2392527197U, 4127419685U,
-	1423694998U, 4156576871U, 1382885582U, 3420127279U, 3617499534U,
-	2994377493U, 4038063986U, 1918458672U, 2983166794U, 4200449033U,
-	 353294540U, 1609232588U,  243926648U, 2332803291U,  507996832U,
-	2392838793U, 4075145196U, 2060984340U, 4287475136U,   88232602U,
-	2491531140U, 4159725633U, 2272075455U,  759298618U,  201384554U,
-	 838356250U, 1416268324U,  674476934U,   90795364U,  141672229U,
-	3660399588U, 4196417251U, 3249270244U, 3774530247U,   59587265U,
-	3683164208U,   19392575U, 1463123697U, 1882205379U,  293780489U,
-	2553160622U, 2933904694U,  675638239U, 2851336944U, 1435238743U,
-	2448730183U,  804436302U, 2119845972U,  322560608U, 4097732704U,
-	2987802540U,  641492617U, 2575442710U, 4217822703U, 3271835300U,
-	2836418300U, 3739921620U, 2138378768U, 2879771855U, 4294903423U,
-	3121097946U, 2603440486U, 2560820391U, 1012930944U, 2313499967U,
-	 584489368U, 3431165766U,  897384869U, 2062537737U, 2847889234U,
-	3742362450U, 2951174585U, 4204621084U, 1109373893U, 3668075775U,
-	2750138839U, 3518055702U,  733072558U, 4169325400U,  788493625U
-};
-static const uint64_t init_gen_rand_64_expected[] = {
-	KQU(16924766246869039260), KQU( 8201438687333352714),
-	KQU( 2265290287015001750), KQU(18397264611805473832),
-	KQU( 3375255223302384358), KQU( 6345559975416828796),
-	KQU(18229739242790328073), KQU( 7596792742098800905),
-	KQU(  255338647169685981), KQU( 2052747240048610300),
-	KQU(18328151576097299343), KQU(12472905421133796567),
-	KQU(11315245349717600863), KQU(16594110197775871209),
-	KQU(15708751964632456450), KQU(10452031272054632535),
-	KQU(11097646720811454386), KQU( 4556090668445745441),
-	KQU(17116187693090663106), KQU(14931526836144510645),
-	KQU( 9190752218020552591), KQU( 9625800285771901401),
-	KQU(13995141077659972832), KQU( 5194209094927829625),
-	KQU( 4156788379151063303), KQU( 8523452593770139494),
-	KQU(14082382103049296727), KQU( 2462601863986088483),
-	KQU( 3030583461592840678), KQU( 5221622077872827681),
-	KQU( 3084210671228981236), KQU(13956758381389953823),
-	KQU(13503889856213423831), KQU(15696904024189836170),
-	KQU( 4612584152877036206), KQU( 6231135538447867881),
-	KQU(10172457294158869468), KQU( 6452258628466708150),
-	KQU(14044432824917330221), KQU(  370168364480044279),
-	KQU(10102144686427193359), KQU(  667870489994776076),
-	KQU( 2732271956925885858), KQU(18027788905977284151),
-	KQU(15009842788582923859), KQU( 7136357960180199542),
-	KQU(15901736243475578127), KQU(16951293785352615701),
-	KQU(10551492125243691632), KQU(17668869969146434804),
-	KQU(13646002971174390445), KQU( 9804471050759613248),
-	KQU( 5511670439655935493), KQU(18103342091070400926),
-	KQU(17224512747665137533), KQU(15534627482992618168),
-	KQU( 1423813266186582647), KQU(15821176807932930024),
-	KQU(   30323369733607156), KQU(11599382494723479403),
-	KQU(  653856076586810062), KQU( 3176437395144899659),
-	KQU(14028076268147963917), KQU(16156398271809666195),
-	KQU( 3166955484848201676), KQU( 5746805620136919390),
-	KQU(17297845208891256593), KQU(11691653183226428483),
-	KQU(17900026146506981577), KQU(15387382115755971042),
-	KQU(16923567681040845943), KQU( 8039057517199388606),
-	KQU(11748409241468629263), KQU(  794358245539076095),
-	KQU(13438501964693401242), KQU(14036803236515618962),
-	KQU( 5252311215205424721), KQU(17806589612915509081),
-	KQU( 6802767092397596006), KQU(14212120431184557140),
-	KQU( 1072951366761385712), KQU(13098491780722836296),
-	KQU( 9466676828710797353), KQU(12673056849042830081),
-	KQU(12763726623645357580), KQU(16468961652999309493),
-	KQU(15305979875636438926), KQU(17444713151223449734),
-	KQU( 5692214267627883674), KQU(13049589139196151505),
-	KQU(  880115207831670745), KQU( 1776529075789695498),
-	KQU(16695225897801466485), KQU(10666901778795346845),
-	KQU( 6164389346722833869), KQU( 2863817793264300475),
-	KQU( 9464049921886304754), KQU( 3993566636740015468),
-	KQU( 9983749692528514136), KQU(16375286075057755211),
-	KQU(16042643417005440820), KQU(11445419662923489877),
-	KQU( 7999038846885158836), KQU( 6721913661721511535),
-	KQU( 5363052654139357320), KQU( 1817788761173584205),
-	KQU(13290974386445856444), KQU( 4650350818937984680),
-	KQU( 8219183528102484836), KQU( 1569862923500819899),
-	KQU( 4189359732136641860), KQU(14202822961683148583),
-	KQU( 4457498315309429058), KQU(13089067387019074834),
-	KQU(11075517153328927293), KQU(10277016248336668389),
-	KQU( 7070509725324401122), KQU(17808892017780289380),
-	KQU(13143367339909287349), KQU( 1377743745360085151),
-	KQU( 5749341807421286485), KQU(14832814616770931325),
-	KQU( 7688820635324359492), KQU(10960474011539770045),
-	KQU(   81970066653179790), KQU(12619476072607878022),
-	KQU( 4419566616271201744), KQU(15147917311750568503),
-	KQU( 5549739182852706345), KQU( 7308198397975204770),
-	KQU(13580425496671289278), KQU(17070764785210130301),
-	KQU( 8202832846285604405), KQU( 6873046287640887249),
-	KQU( 6927424434308206114), KQU( 6139014645937224874),
-	KQU(10290373645978487639), KQU(15904261291701523804),
-	KQU( 9628743442057826883), KQU(18383429096255546714),
-	KQU( 4977413265753686967), KQU( 7714317492425012869),
-	KQU( 9025232586309926193), KQU(14627338359776709107),
-	KQU(14759849896467790763), KQU(10931129435864423252),
-	KQU( 4588456988775014359), KQU(10699388531797056724),
-	KQU(  468652268869238792), KQU( 5755943035328078086),
-	KQU( 2102437379988580216), KQU( 9986312786506674028),
-	KQU( 2654207180040945604), KQU( 8726634790559960062),
-	KQU(  100497234871808137), KQU( 2800137176951425819),
-	KQU( 6076627612918553487), KQU( 5780186919186152796),
-	KQU( 8179183595769929098), KQU( 6009426283716221169),
-	KQU( 2796662551397449358), KQU( 1756961367041986764),
-	KQU( 6972897917355606205), KQU(14524774345368968243),
-	KQU( 2773529684745706940), KQU( 4853632376213075959),
-	KQU( 4198177923731358102), KQU( 8271224913084139776),
-	KQU( 2741753121611092226), KQU(16782366145996731181),
-	KQU(15426125238972640790), KQU(13595497100671260342),
-	KQU( 3173531022836259898), KQU( 6573264560319511662),
-	KQU(18041111951511157441), KQU( 2351433581833135952),
-	KQU( 3113255578908173487), KQU( 1739371330877858784),
-	KQU(16046126562789165480), KQU( 8072101652214192925),
-	KQU(15267091584090664910), KQU( 9309579200403648940),
-	KQU( 5218892439752408722), KQU(14492477246004337115),
-	KQU(17431037586679770619), KQU( 7385248135963250480),
-	KQU( 9580144956565560660), KQU( 4919546228040008720),
-	KQU(15261542469145035584), KQU(18233297270822253102),
-	KQU( 5453248417992302857), KQU( 9309519155931460285),
-	KQU(10342813012345291756), KQU(15676085186784762381),
-	KQU(15912092950691300645), KQU( 9371053121499003195),
-	KQU( 9897186478226866746), KQU(14061858287188196327),
-	KQU(  122575971620788119), KQU(12146750969116317754),
-	KQU( 4438317272813245201), KQU( 8332576791009527119),
-	KQU(13907785691786542057), KQU(10374194887283287467),
-	KQU( 2098798755649059566), KQU( 3416235197748288894),
-	KQU( 8688269957320773484), KQU( 7503964602397371571),
-	KQU(16724977015147478236), KQU( 9461512855439858184),
-	KQU(13259049744534534727), KQU( 3583094952542899294),
-	KQU( 8764245731305528292), KQU(13240823595462088985),
-	KQU(13716141617617910448), KQU(18114969519935960955),
-	KQU( 2297553615798302206), KQU( 4585521442944663362),
-	KQU(17776858680630198686), KQU( 4685873229192163363),
-	KQU(  152558080671135627), KQU(15424900540842670088),
-	KQU(13229630297130024108), KQU(17530268788245718717),
-	KQU(16675633913065714144), KQU( 3158912717897568068),
-	KQU(15399132185380087288), KQU( 7401418744515677872),
-	KQU(13135412922344398535), KQU( 6385314346100509511),
-	KQU(13962867001134161139), KQU(10272780155442671999),
-	KQU(12894856086597769142), KQU(13340877795287554994),
-	KQU(12913630602094607396), KQU(12543167911119793857),
-	KQU(17343570372251873096), KQU(10959487764494150545),
-	KQU( 6966737953093821128), KQU(13780699135496988601),
-	KQU( 4405070719380142046), KQU(14923788365607284982),
-	KQU( 2869487678905148380), KQU( 6416272754197188403),
-	KQU(15017380475943612591), KQU( 1995636220918429487),
-	KQU( 3402016804620122716), KQU(15800188663407057080),
-	KQU(11362369990390932882), KQU(15262183501637986147),
-	KQU(10239175385387371494), KQU( 9352042420365748334),
-	KQU( 1682457034285119875), KQU( 1724710651376289644),
-	KQU( 2038157098893817966), KQU( 9897825558324608773),
-	KQU( 1477666236519164736), KQU(16835397314511233640),
-	KQU(10370866327005346508), KQU(10157504370660621982),
-	KQU(12113904045335882069), KQU(13326444439742783008),
-	KQU(11302769043000765804), KQU(13594979923955228484),
-	KQU(11779351762613475968), KQU( 3786101619539298383),
-	KQU( 8021122969180846063), KQU(15745904401162500495),
-	KQU(10762168465993897267), KQU(13552058957896319026),
-	KQU(11200228655252462013), KQU( 5035370357337441226),
-	KQU( 7593918984545500013), KQU( 5418554918361528700),
-	KQU( 4858270799405446371), KQU( 9974659566876282544),
-	KQU(18227595922273957859), KQU( 2772778443635656220),
-	KQU(14285143053182085385), KQU( 9939700992429600469),
-	KQU(12756185904545598068), KQU( 2020783375367345262),
-	KQU(   57026775058331227), KQU(  950827867930065454),
-	KQU( 6602279670145371217), KQU( 2291171535443566929),
-	KQU( 5832380724425010313), KQU( 1220343904715982285),
-	KQU(17045542598598037633), KQU(15460481779702820971),
-	KQU(13948388779949365130), KQU(13975040175430829518),
-	KQU(17477538238425541763), KQU(11104663041851745725),
-	KQU(15860992957141157587), KQU(14529434633012950138),
-	KQU( 2504838019075394203), KQU( 7512113882611121886),
-	KQU( 4859973559980886617), KQU( 1258601555703250219),
-	KQU(15594548157514316394), KQU( 4516730171963773048),
-	KQU(11380103193905031983), KQU( 6809282239982353344),
-	KQU(18045256930420065002), KQU( 2453702683108791859),
-	KQU(  977214582986981460), KQU( 2006410402232713466),
-	KQU( 6192236267216378358), KQU( 3429468402195675253),
-	KQU(18146933153017348921), KQU(17369978576367231139),
-	KQU( 1246940717230386603), KQU(11335758870083327110),
-	KQU(14166488801730353682), KQU( 9008573127269635732),
-	KQU(10776025389820643815), KQU(15087605441903942962),
-	KQU( 1359542462712147922), KQU(13898874411226454206),
-	KQU(17911176066536804411), KQU( 9435590428600085274),
-	KQU(  294488509967864007), KQU( 8890111397567922046),
-	KQU( 7987823476034328778), KQU(13263827582440967651),
-	KQU( 7503774813106751573), KQU(14974747296185646837),
-	KQU( 8504765037032103375), KQU(17340303357444536213),
-	KQU( 7704610912964485743), KQU( 8107533670327205061),
-	KQU( 9062969835083315985), KQU(16968963142126734184),
-	KQU(12958041214190810180), KQU( 2720170147759570200),
-	KQU( 2986358963942189566), KQU(14884226322219356580),
-	KQU(  286224325144368520), KQU(11313800433154279797),
-	KQU(18366849528439673248), KQU(17899725929482368789),
-	KQU( 3730004284609106799), KQU( 1654474302052767205),
-	KQU( 5006698007047077032), KQU( 8196893913601182838),
-	KQU(15214541774425211640), KQU(17391346045606626073),
-	KQU( 8369003584076969089), KQU( 3939046733368550293),
-	KQU(10178639720308707785), KQU( 2180248669304388697),
-	KQU(   62894391300126322), KQU( 9205708961736223191),
-	KQU( 6837431058165360438), KQU( 3150743890848308214),
-	KQU(17849330658111464583), KQU(12214815643135450865),
-	KQU(13410713840519603402), KQU( 3200778126692046802),
-	KQU(13354780043041779313), KQU(  800850022756886036),
-	KQU(15660052933953067433), KQU( 6572823544154375676),
-	KQU(11030281857015819266), KQU(12682241941471433835),
-	KQU(11654136407300274693), KQU( 4517795492388641109),
-	KQU( 9757017371504524244), KQU(17833043400781889277),
-	KQU(12685085201747792227), KQU(10408057728835019573),
-	KQU(   98370418513455221), KQU( 6732663555696848598),
-	KQU(13248530959948529780), KQU( 3530441401230622826),
-	KQU(18188251992895660615), KQU( 1847918354186383756),
-	KQU( 1127392190402660921), KQU(11293734643143819463),
-	KQU( 3015506344578682982), KQU(13852645444071153329),
-	KQU( 2121359659091349142), KQU( 1294604376116677694),
-	KQU( 5616576231286352318), KQU( 7112502442954235625),
-	KQU(11676228199551561689), KQU(12925182803007305359),
-	KQU( 7852375518160493082), KQU( 1136513130539296154),
-	KQU( 5636923900916593195), KQU( 3221077517612607747),
-	KQU(17784790465798152513), KQU( 3554210049056995938),
-	KQU(17476839685878225874), KQU( 3206836372585575732),
-	KQU( 2765333945644823430), KQU(10080070903718799528),
-	KQU( 5412370818878286353), KQU( 9689685887726257728),
-	KQU( 8236117509123533998), KQU( 1951139137165040214),
-	KQU( 4492205209227980349), KQU(16541291230861602967),
-	KQU( 1424371548301437940), KQU( 9117562079669206794),
-	KQU(14374681563251691625), KQU(13873164030199921303),
-	KQU( 6680317946770936731), KQU(15586334026918276214),
-	KQU(10896213950976109802), KQU( 9506261949596413689),
-	KQU( 9903949574308040616), KQU( 6038397344557204470),
-	KQU(  174601465422373648), KQU(15946141191338238030),
-	KQU(17142225620992044937), KQU( 7552030283784477064),
-	KQU( 2947372384532947997), KQU(  510797021688197711),
-	KQU( 4962499439249363461), KQU(   23770320158385357),
-	KQU(  959774499105138124), KQU( 1468396011518788276),
-	KQU( 2015698006852312308), KQU( 4149400718489980136),
-	KQU( 5992916099522371188), KQU(10819182935265531076),
-	KQU(16189787999192351131), KQU(  342833961790261950),
-	KQU(12470830319550495336), KQU(18128495041912812501),
-	KQU( 1193600899723524337), KQU( 9056793666590079770),
-	KQU( 2154021227041669041), KQU( 4963570213951235735),
-	KQU( 4865075960209211409), KQU( 2097724599039942963),
-	KQU( 2024080278583179845), KQU(11527054549196576736),
-	KQU(10650256084182390252), KQU( 4808408648695766755),
-	KQU( 1642839215013788844), KQU(10607187948250398390),
-	KQU( 7076868166085913508), KQU(  730522571106887032),
-	KQU(12500579240208524895), KQU( 4484390097311355324),
-	KQU(15145801330700623870), KQU( 8055827661392944028),
-	KQU( 5865092976832712268), KQU(15159212508053625143),
-	KQU( 3560964582876483341), KQU( 4070052741344438280),
-	KQU( 6032585709886855634), KQU(15643262320904604873),
-	KQU( 2565119772293371111), KQU(  318314293065348260),
-	KQU(15047458749141511872), KQU( 7772788389811528730),
-	KQU( 7081187494343801976), KQU( 6465136009467253947),
-	KQU(10425940692543362069), KQU(  554608190318339115),
-	KQU(14796699860302125214), KQU( 1638153134431111443),
-	KQU(10336967447052276248), KQU( 8412308070396592958),
-	KQU( 4004557277152051226), KQU( 8143598997278774834),
-	KQU(16413323996508783221), KQU(13139418758033994949),
-	KQU( 9772709138335006667), KQU( 2818167159287157659),
-	KQU(17091740573832523669), KQU(14629199013130751608),
-	KQU(18268322711500338185), KQU( 8290963415675493063),
-	KQU( 8830864907452542588), KQU( 1614839084637494849),
-	KQU(14855358500870422231), KQU( 3472996748392519937),
-	KQU(15317151166268877716), KQU( 5825895018698400362),
-	KQU(16730208429367544129), KQU(10481156578141202800),
-	KQU( 4746166512382823750), KQU(12720876014472464998),
-	KQU( 8825177124486735972), KQU(13733447296837467838),
-	KQU( 6412293741681359625), KQU( 8313213138756135033),
-	KQU(11421481194803712517), KQU( 7997007691544174032),
-	KQU( 6812963847917605930), KQU( 9683091901227558641),
-	KQU(14703594165860324713), KQU( 1775476144519618309),
-	KQU( 2724283288516469519), KQU(  717642555185856868),
-	KQU( 8736402192215092346), KQU(11878800336431381021),
-	KQU( 4348816066017061293), KQU( 6115112756583631307),
-	KQU( 9176597239667142976), KQU(12615622714894259204),
-	KQU(10283406711301385987), KQU( 5111762509485379420),
-	KQU( 3118290051198688449), KQU( 7345123071632232145),
-	KQU( 9176423451688682359), KQU( 4843865456157868971),
-	KQU(12008036363752566088), KQU(12058837181919397720),
-	KQU( 2145073958457347366), KQU( 1526504881672818067),
-	KQU( 3488830105567134848), KQU(13208362960674805143),
-	KQU( 4077549672899572192), KQU( 7770995684693818365),
-	KQU( 1398532341546313593), KQU(12711859908703927840),
-	KQU( 1417561172594446813), KQU(17045191024194170604),
-	KQU( 4101933177604931713), KQU(14708428834203480320),
-	KQU(17447509264469407724), KQU(14314821973983434255),
-	KQU(17990472271061617265), KQU( 5087756685841673942),
-	KQU(12797820586893859939), KQU( 1778128952671092879),
-	KQU( 3535918530508665898), KQU( 9035729701042481301),
-	KQU(14808661568277079962), KQU(14587345077537747914),
-	KQU(11920080002323122708), KQU( 6426515805197278753),
-	KQU( 3295612216725984831), KQU(11040722532100876120),
-	KQU(12305952936387598754), KQU(16097391899742004253),
-	KQU( 4908537335606182208), KQU(12446674552196795504),
-	KQU(16010497855816895177), KQU( 9194378874788615551),
-	KQU( 3382957529567613384), KQU( 5154647600754974077),
-	KQU( 9801822865328396141), KQU( 9023662173919288143),
-	KQU(17623115353825147868), KQU( 8238115767443015816),
-	KQU(15811444159859002560), KQU( 9085612528904059661),
-	KQU( 6888601089398614254), KQU(  258252992894160189),
-	KQU( 6704363880792428622), KQU( 6114966032147235763),
-	KQU(11075393882690261875), KQU( 8797664238933620407),
-	KQU( 5901892006476726920), KQU( 5309780159285518958),
-	KQU(14940808387240817367), KQU(14642032021449656698),
-	KQU( 9808256672068504139), KQU( 3670135111380607658),
-	KQU(11211211097845960152), KQU( 1474304506716695808),
-	KQU(15843166204506876239), KQU( 7661051252471780561),
-	KQU(10170905502249418476), KQU( 7801416045582028589),
-	KQU( 2763981484737053050), KQU( 9491377905499253054),
-	KQU(16201395896336915095), KQU( 9256513756442782198),
-	KQU( 5411283157972456034), KQU( 5059433122288321676),
-	KQU( 4327408006721123357), KQU( 9278544078834433377),
-	KQU( 7601527110882281612), KQU(11848295896975505251),
-	KQU(12096998801094735560), KQU(14773480339823506413),
-	KQU(15586227433895802149), KQU(12786541257830242872),
-	KQU( 6904692985140503067), KQU( 5309011515263103959),
-	KQU(12105257191179371066), KQU(14654380212442225037),
-	KQU( 2556774974190695009), KQU( 4461297399927600261),
-	KQU(14888225660915118646), KQU(14915459341148291824),
-	KQU( 2738802166252327631), KQU( 6047155789239131512),
-	KQU(12920545353217010338), KQU(10697617257007840205),
-	KQU( 2751585253158203504), KQU(13252729159780047496),
-	KQU(14700326134672815469), KQU(14082527904374600529),
-	KQU(16852962273496542070), KQU(17446675504235853907),
-	KQU(15019600398527572311), KQU(12312781346344081551),
-	KQU(14524667935039810450), KQU( 5634005663377195738),
-	KQU(11375574739525000569), KQU( 2423665396433260040),
-	KQU( 5222836914796015410), KQU( 4397666386492647387),
-	KQU( 4619294441691707638), KQU(  665088602354770716),
-	KQU(13246495665281593610), KQU( 6564144270549729409),
-	KQU(10223216188145661688), KQU( 3961556907299230585),
-	KQU(11543262515492439914), KQU(16118031437285993790),
-	KQU( 7143417964520166465), KQU(13295053515909486772),
-	KQU(   40434666004899675), KQU(17127804194038347164),
-	KQU( 8599165966560586269), KQU( 8214016749011284903),
-	KQU(13725130352140465239), KQU( 5467254474431726291),
-	KQU( 7748584297438219877), KQU(16933551114829772472),
-	KQU( 2169618439506799400), KQU( 2169787627665113463),
-	KQU(17314493571267943764), KQU(18053575102911354912),
-	KQU(11928303275378476973), KQU(11593850925061715550),
-	KQU(17782269923473589362), KQU( 3280235307704747039),
-	KQU( 6145343578598685149), KQU(17080117031114086090),
-	KQU(18066839902983594755), KQU( 6517508430331020706),
-	KQU( 8092908893950411541), KQU(12558378233386153732),
-	KQU( 4476532167973132976), KQU(16081642430367025016),
-	KQU( 4233154094369139361), KQU( 8693630486693161027),
-	KQU(11244959343027742285), KQU(12273503967768513508),
-	KQU(14108978636385284876), KQU( 7242414665378826984),
-	KQU( 6561316938846562432), KQU( 8601038474994665795),
-	KQU(17532942353612365904), KQU(17940076637020912186),
-	KQU( 7340260368823171304), KQU( 7061807613916067905),
-	KQU(10561734935039519326), KQU(17990796503724650862),
-	KQU( 6208732943911827159), KQU(  359077562804090617),
-	KQU(14177751537784403113), KQU(10659599444915362902),
-	KQU(15081727220615085833), KQU(13417573895659757486),
-	KQU(15513842342017811524), KQU(11814141516204288231),
-	KQU( 1827312513875101814), KQU( 2804611699894603103),
-	KQU(17116500469975602763), KQU(12270191815211952087),
-	KQU(12256358467786024988), KQU(18435021722453971267),
-	KQU(  671330264390865618), KQU(  476504300460286050),
-	KQU(16465470901027093441), KQU( 4047724406247136402),
-	KQU( 1322305451411883346), KQU( 1388308688834322280),
-	KQU( 7303989085269758176), KQU( 9323792664765233642),
-	KQU( 4542762575316368936), KQU(17342696132794337618),
-	KQU( 4588025054768498379), KQU(13415475057390330804),
-	KQU(17880279491733405570), KQU(10610553400618620353),
-	KQU( 3180842072658960139), KQU(13002966655454270120),
-	KQU( 1665301181064982826), KQU( 7083673946791258979),
-	KQU(  190522247122496820), KQU(17388280237250677740),
-	KQU( 8430770379923642945), KQU(12987180971921668584),
-	KQU( 2311086108365390642), KQU( 2870984383579822345),
-	KQU(14014682609164653318), KQU(14467187293062251484),
-	KQU(  192186361147413298), KQU(15171951713531796524),
-	KQU( 9900305495015948728), KQU(17958004775615466344),
-	KQU(14346380954498606514), KQU(18040047357617407096),
-	KQU( 5035237584833424532), KQU(15089555460613972287),
-	KQU( 4131411873749729831), KQU( 1329013581168250330),
-	KQU(10095353333051193949), KQU(10749518561022462716),
-	KQU( 9050611429810755847), KQU(15022028840236655649),
-	KQU( 8775554279239748298), KQU(13105754025489230502),
-	KQU(15471300118574167585), KQU(   89864764002355628),
-	KQU( 8776416323420466637), KQU( 5280258630612040891),
-	KQU( 2719174488591862912), KQU( 7599309137399661994),
-	KQU(15012887256778039979), KQU(14062981725630928925),
-	KQU(12038536286991689603), KQU( 7089756544681775245),
-	KQU(10376661532744718039), KQU( 1265198725901533130),
-	KQU(13807996727081142408), KQU( 2935019626765036403),
-	KQU( 7651672460680700141), KQU( 3644093016200370795),
-	KQU( 2840982578090080674), KQU(17956262740157449201),
-	KQU(18267979450492880548), KQU(11799503659796848070),
-	KQU( 9942537025669672388), KQU(11886606816406990297),
-	KQU( 5488594946437447576), KQU( 7226714353282744302),
-	KQU( 3784851653123877043), KQU(  878018453244803041),
-	KQU(12110022586268616085), KQU(  734072179404675123),
-	KQU(11869573627998248542), KQU(  469150421297783998),
-	KQU(  260151124912803804), KQU(11639179410120968649),
-	KQU( 9318165193840846253), KQU(12795671722734758075),
-	KQU(15318410297267253933), KQU(  691524703570062620),
-	KQU( 5837129010576994601), KQU(15045963859726941052),
-	KQU( 5850056944932238169), KQU(12017434144750943807),
-	KQU( 7447139064928956574), KQU( 3101711812658245019),
-	KQU(16052940704474982954), KQU(18195745945986994042),
-	KQU( 8932252132785575659), KQU(13390817488106794834),
-	KQU(11582771836502517453), KQU( 4964411326683611686),
-	KQU( 2195093981702694011), KQU(14145229538389675669),
-	KQU(16459605532062271798), KQU(  866316924816482864),
-	KQU( 4593041209937286377), KQU( 8415491391910972138),
-	KQU( 4171236715600528969), KQU(16637569303336782889),
-	KQU( 2002011073439212680), KQU(17695124661097601411),
-	KQU( 4627687053598611702), KQU( 7895831936020190403),
-	KQU( 8455951300917267802), KQU( 2923861649108534854),
-	KQU( 8344557563927786255), KQU( 6408671940373352556),
-	KQU(12210227354536675772), KQU(14294804157294222295),
-	KQU(10103022425071085127), KQU(10092959489504123771),
-	KQU( 6554774405376736268), KQU(12629917718410641774),
-	KQU( 6260933257596067126), KQU( 2460827021439369673),
-	KQU( 2541962996717103668), KQU(  597377203127351475),
-	KQU( 5316984203117315309), KQU( 4811211393563241961),
-	KQU(13119698597255811641), KQU( 8048691512862388981),
-	KQU(10216818971194073842), KQU( 4612229970165291764),
-	KQU(10000980798419974770), KQU( 6877640812402540687),
-	KQU( 1488727563290436992), KQU( 2227774069895697318),
-	KQU(11237754507523316593), KQU(13478948605382290972),
-	KQU( 1963583846976858124), KQU( 5512309205269276457),
-	KQU( 3972770164717652347), KQU( 3841751276198975037),
-	KQU(10283343042181903117), KQU( 8564001259792872199),
-	KQU(16472187244722489221), KQU( 8953493499268945921),
-	KQU( 3518747340357279580), KQU( 4003157546223963073),
-	KQU( 3270305958289814590), KQU( 3966704458129482496),
-	KQU( 8122141865926661939), KQU(14627734748099506653),
-	KQU(13064426990862560568), KQU( 2414079187889870829),
-	KQU( 5378461209354225306), KQU(10841985740128255566),
-	KQU(  538582442885401738), KQU( 7535089183482905946),
-	KQU(16117559957598879095), KQU( 8477890721414539741),
-	KQU( 1459127491209533386), KQU(17035126360733620462),
-	KQU( 8517668552872379126), KQU(10292151468337355014),
-	KQU(17081267732745344157), KQU(13751455337946087178),
-	KQU(14026945459523832966), KQU( 6653278775061723516),
-	KQU(10619085543856390441), KQU( 2196343631481122885),
-	KQU(10045966074702826136), KQU(10082317330452718282),
-	KQU( 5920859259504831242), KQU( 9951879073426540617),
-	KQU( 7074696649151414158), KQU(15808193543879464318),
-	KQU( 7385247772746953374), KQU( 3192003544283864292),
-	KQU(18153684490917593847), KQU(12423498260668568905),
-	KQU(10957758099756378169), KQU(11488762179911016040),
-	KQU( 2099931186465333782), KQU(11180979581250294432),
-	KQU( 8098916250668367933), KQU( 3529200436790763465),
-	KQU(12988418908674681745), KQU( 6147567275954808580),
-	KQU( 3207503344604030989), KQU(10761592604898615360),
-	KQU(  229854861031893504), KQU( 8809853962667144291),
-	KQU(13957364469005693860), KQU( 7634287665224495886),
-	KQU(12353487366976556874), KQU( 1134423796317152034),
-	KQU( 2088992471334107068), KQU( 7393372127190799698),
-	KQU( 1845367839871058391), KQU(  207922563987322884),
-	KQU(11960870813159944976), KQU(12182120053317317363),
-	KQU(17307358132571709283), KQU(13871081155552824936),
-	KQU(18304446751741566262), KQU( 7178705220184302849),
-	KQU(10929605677758824425), KQU(16446976977835806844),
-	KQU(13723874412159769044), KQU( 6942854352100915216),
-	KQU( 1726308474365729390), KQU( 2150078766445323155),
-	KQU(15345558947919656626), KQU(12145453828874527201),
-	KQU( 2054448620739726849), KQU( 2740102003352628137),
-	KQU(11294462163577610655), KQU(  756164283387413743),
-	KQU(17841144758438810880), KQU(10802406021185415861),
-	KQU( 8716455530476737846), KQU( 6321788834517649606),
-	KQU(14681322910577468426), KQU(17330043563884336387),
-	KQU(12701802180050071614), KQU(14695105111079727151),
-	KQU( 5112098511654172830), KQU( 4957505496794139973),
-	KQU( 8270979451952045982), KQU(12307685939199120969),
-	KQU(12425799408953443032), KQU( 8376410143634796588),
-	KQU(16621778679680060464), KQU( 3580497854566660073),
-	KQU( 1122515747803382416), KQU(  857664980960597599),
-	KQU( 6343640119895925918), KQU(12878473260854462891),
-	KQU(10036813920765722626), KQU(14451335468363173812),
-	KQU( 5476809692401102807), KQU(16442255173514366342),
-	KQU(13060203194757167104), KQU(14354124071243177715),
-	KQU(15961249405696125227), KQU(13703893649690872584),
-	KQU(  363907326340340064), KQU( 6247455540491754842),
-	KQU(12242249332757832361), KQU(  156065475679796717),
-	KQU( 9351116235749732355), KQU( 4590350628677701405),
-	KQU( 1671195940982350389), KQU(13501398458898451905),
-	KQU( 6526341991225002255), KQU( 1689782913778157592),
-	KQU( 7439222350869010334), KQU(13975150263226478308),
-	KQU(11411961169932682710), KQU(17204271834833847277),
-	KQU(  541534742544435367), KQU( 6591191931218949684),
-	KQU( 2645454775478232486), KQU( 4322857481256485321),
-	KQU( 8477416487553065110), KQU(12902505428548435048),
-	KQU(  971445777981341415), KQU(14995104682744976712),
-	KQU( 4243341648807158063), KQU( 8695061252721927661),
-	KQU( 5028202003270177222), KQU( 2289257340915567840),
-	KQU(13870416345121866007), KQU(13994481698072092233),
-	KQU( 6912785400753196481), KQU( 2278309315841980139),
-	KQU( 4329765449648304839), KQU( 5963108095785485298),
-	KQU( 4880024847478722478), KQU(16015608779890240947),
-	KQU( 1866679034261393544), KQU(  914821179919731519),
-	KQU( 9643404035648760131), KQU( 2418114953615593915),
-	KQU(  944756836073702374), KQU(15186388048737296834),
-	KQU( 7723355336128442206), KQU( 7500747479679599691),
-	KQU(18013961306453293634), KQU( 2315274808095756456),
-	KQU(13655308255424029566), KQU(17203800273561677098),
-	KQU( 1382158694422087756), KQU( 5090390250309588976),
-	KQU(  517170818384213989), KQU( 1612709252627729621),
-	KQU( 1330118955572449606), KQU(  300922478056709885),
-	KQU(18115693291289091987), KQU(13491407109725238321),
-	KQU(15293714633593827320), KQU( 5151539373053314504),
-	KQU( 5951523243743139207), KQU(14459112015249527975),
-	KQU( 5456113959000700739), KQU( 3877918438464873016),
-	KQU(12534071654260163555), KQU(15871678376893555041),
-	KQU(11005484805712025549), KQU(16353066973143374252),
-	KQU( 4358331472063256685), KQU( 8268349332210859288),
-	KQU(12485161590939658075), KQU(13955993592854471343),
-	KQU( 5911446886848367039), KQU(14925834086813706974),
-	KQU( 6590362597857994805), KQU( 1280544923533661875),
-	KQU( 1637756018947988164), KQU( 4734090064512686329),
-	KQU(16693705263131485912), KQU( 6834882340494360958),
-	KQU( 8120732176159658505), KQU( 2244371958905329346),
-	KQU(10447499707729734021), KQU( 7318742361446942194),
-	KQU( 8032857516355555296), KQU(14023605983059313116),
-	KQU( 1032336061815461376), KQU( 9840995337876562612),
-	KQU( 9869256223029203587), KQU(12227975697177267636),
-	KQU(12728115115844186033), KQU( 7752058479783205470),
-	KQU(  729733219713393087), KQU(12954017801239007622)
-};
-static const uint64_t init_by_array_64_expected[] = {
-	KQU( 2100341266307895239), KQU( 8344256300489757943),
-	KQU(15687933285484243894), KQU( 8268620370277076319),
-	KQU(12371852309826545459), KQU( 8800491541730110238),
-	KQU(18113268950100835773), KQU( 2886823658884438119),
-	KQU( 3293667307248180724), KQU( 9307928143300172731),
-	KQU( 7688082017574293629), KQU(  900986224735166665),
-	KQU( 9977972710722265039), KQU( 6008205004994830552),
-	KQU(  546909104521689292), KQU( 7428471521869107594),
-	KQU(14777563419314721179), KQU(16116143076567350053),
-	KQU( 5322685342003142329), KQU( 4200427048445863473),
-	KQU( 4693092150132559146), KQU(13671425863759338582),
-	KQU( 6747117460737639916), KQU( 4732666080236551150),
-	KQU( 5912839950611941263), KQU( 3903717554504704909),
-	KQU( 2615667650256786818), KQU(10844129913887006352),
-	KQU(13786467861810997820), KQU(14267853002994021570),
-	KQU(13767807302847237439), KQU(16407963253707224617),
-	KQU( 4802498363698583497), KQU( 2523802839317209764),
-	KQU( 3822579397797475589), KQU( 8950320572212130610),
-	KQU( 3745623504978342534), KQU(16092609066068482806),
-	KQU( 9817016950274642398), KQU(10591660660323829098),
-	KQU(11751606650792815920), KQU( 5122873818577122211),
-	KQU(17209553764913936624), KQU( 6249057709284380343),
-	KQU(15088791264695071830), KQU(15344673071709851930),
-	KQU( 4345751415293646084), KQU( 2542865750703067928),
-	KQU(13520525127852368784), KQU(18294188662880997241),
-	KQU( 3871781938044881523), KQU( 2873487268122812184),
-	KQU(15099676759482679005), KQU(15442599127239350490),
-	KQU( 6311893274367710888), KQU( 3286118760484672933),
-	KQU( 4146067961333542189), KQU(13303942567897208770),
-	KQU( 8196013722255630418), KQU( 4437815439340979989),
-	KQU(15433791533450605135), KQU( 4254828956815687049),
-	KQU( 1310903207708286015), KQU(10529182764462398549),
-	KQU(14900231311660638810), KQU( 9727017277104609793),
-	KQU( 1821308310948199033), KQU(11628861435066772084),
-	KQU( 9469019138491546924), KQU( 3145812670532604988),
-	KQU( 9938468915045491919), KQU( 1562447430672662142),
-	KQU(13963995266697989134), KQU( 3356884357625028695),
-	KQU( 4499850304584309747), KQU( 8456825817023658122),
-	KQU(10859039922814285279), KQU( 8099512337972526555),
-	KQU(  348006375109672149), KQU(11919893998241688603),
-	KQU( 1104199577402948826), KQU(16689191854356060289),
-	KQU(10992552041730168078), KQU( 7243733172705465836),
-	KQU( 5668075606180319560), KQU(18182847037333286970),
-	KQU( 4290215357664631322), KQU( 4061414220791828613),
-	KQU(13006291061652989604), KQU( 7140491178917128798),
-	KQU(12703446217663283481), KQU( 5500220597564558267),
-	KQU(10330551509971296358), KQU(15958554768648714492),
-	KQU( 5174555954515360045), KQU( 1731318837687577735),
-	KQU( 3557700801048354857), KQU(13764012341928616198),
-	KQU(13115166194379119043), KQU( 7989321021560255519),
-	KQU( 2103584280905877040), KQU( 9230788662155228488),
-	KQU(16396629323325547654), KQU(  657926409811318051),
-	KQU(15046700264391400727), KQU( 5120132858771880830),
-	KQU( 7934160097989028561), KQU( 6963121488531976245),
-	KQU(17412329602621742089), KQU(15144843053931774092),
-	KQU(17204176651763054532), KQU(13166595387554065870),
-	KQU( 8590377810513960213), KQU( 5834365135373991938),
-	KQU( 7640913007182226243), KQU( 3479394703859418425),
-	KQU(16402784452644521040), KQU( 4993979809687083980),
-	KQU(13254522168097688865), KQU(15643659095244365219),
-	KQU( 5881437660538424982), KQU(11174892200618987379),
-	KQU(  254409966159711077), KQU(17158413043140549909),
-	KQU( 3638048789290376272), KQU( 1376816930299489190),
-	KQU( 4622462095217761923), KQU(15086407973010263515),
-	KQU(13253971772784692238), KQU( 5270549043541649236),
-	KQU(11182714186805411604), KQU(12283846437495577140),
-	KQU( 5297647149908953219), KQU(10047451738316836654),
-	KQU( 4938228100367874746), KQU(12328523025304077923),
-	KQU( 3601049438595312361), KQU( 9313624118352733770),
-	KQU(13322966086117661798), KQU(16660005705644029394),
-	KQU(11337677526988872373), KQU(13869299102574417795),
-	KQU(15642043183045645437), KQU( 3021755569085880019),
-	KQU( 4979741767761188161), KQU(13679979092079279587),
-	KQU( 3344685842861071743), KQU(13947960059899588104),
-	KQU(  305806934293368007), KQU( 5749173929201650029),
-	KQU(11123724852118844098), KQU(15128987688788879802),
-	KQU(15251651211024665009), KQU( 7689925933816577776),
-	KQU(16732804392695859449), KQU(17087345401014078468),
-	KQU(14315108589159048871), KQU( 4820700266619778917),
-	KQU(16709637539357958441), KQU( 4936227875177351374),
-	KQU( 2137907697912987247), KQU(11628565601408395420),
-	KQU( 2333250549241556786), KQU( 5711200379577778637),
-	KQU( 5170680131529031729), KQU(12620392043061335164),
-	KQU(   95363390101096078), KQU( 5487981914081709462),
-	KQU( 1763109823981838620), KQU( 3395861271473224396),
-	KQU( 1300496844282213595), KQU( 6894316212820232902),
-	KQU(10673859651135576674), KQU( 5911839658857903252),
-	KQU(17407110743387299102), KQU( 8257427154623140385),
-	KQU(11389003026741800267), KQU( 4070043211095013717),
-	KQU(11663806997145259025), KQU(15265598950648798210),
-	KQU(  630585789434030934), KQU( 3524446529213587334),
-	KQU( 7186424168495184211), KQU(10806585451386379021),
-	KQU(11120017753500499273), KQU( 1586837651387701301),
-	KQU(17530454400954415544), KQU( 9991670045077880430),
-	KQU( 7550997268990730180), KQU( 8640249196597379304),
-	KQU( 3522203892786893823), KQU(10401116549878854788),
-	KQU(13690285544733124852), KQU( 8295785675455774586),
-	KQU(15535716172155117603), KQU( 3112108583723722511),
-	KQU(17633179955339271113), KQU(18154208056063759375),
-	KQU( 1866409236285815666), KQU(13326075895396412882),
-	KQU( 8756261842948020025), KQU( 6281852999868439131),
-	KQU(15087653361275292858), KQU(10333923911152949397),
-	KQU( 5265567645757408500), KQU(12728041843210352184),
-	KQU( 6347959327507828759), KQU(  154112802625564758),
-	KQU(18235228308679780218), KQU( 3253805274673352418),
-	KQU( 4849171610689031197), KQU(17948529398340432518),
-	KQU(13803510475637409167), KQU(13506570190409883095),
-	KQU(15870801273282960805), KQU( 8451286481299170773),
-	KQU( 9562190620034457541), KQU( 8518905387449138364),
-	KQU(12681306401363385655), KQU( 3788073690559762558),
-	KQU( 5256820289573487769), KQU( 2752021372314875467),
-	KQU( 6354035166862520716), KQU( 4328956378309739069),
-	KQU(  449087441228269600), KQU( 5533508742653090868),
-	KQU( 1260389420404746988), KQU(18175394473289055097),
-	KQU( 1535467109660399420), KQU( 8818894282874061442),
-	KQU(12140873243824811213), KQU(15031386653823014946),
-	KQU( 1286028221456149232), KQU( 6329608889367858784),
-	KQU( 9419654354945132725), KQU( 6094576547061672379),
-	KQU(17706217251847450255), KQU( 1733495073065878126),
-	KQU(16918923754607552663), KQU( 8881949849954945044),
-	KQU(12938977706896313891), KQU(14043628638299793407),
-	KQU(18393874581723718233), KQU( 6886318534846892044),
-	KQU(14577870878038334081), KQU(13541558383439414119),
-	KQU(13570472158807588273), KQU(18300760537910283361),
-	KQU(  818368572800609205), KQU( 1417000585112573219),
-	KQU(12337533143867683655), KQU(12433180994702314480),
-	KQU(  778190005829189083), KQU(13667356216206524711),
-	KQU( 9866149895295225230), KQU(11043240490417111999),
-	KQU( 1123933826541378598), KQU( 6469631933605123610),
-	KQU(14508554074431980040), KQU(13918931242962026714),
-	KQU( 2870785929342348285), KQU(14786362626740736974),
-	KQU(13176680060902695786), KQU( 9591778613541679456),
-	KQU( 9097662885117436706), KQU(  749262234240924947),
-	KQU( 1944844067793307093), KQU( 4339214904577487742),
-	KQU( 8009584152961946551), KQU(16073159501225501777),
-	KQU( 3335870590499306217), KQU(17088312653151202847),
-	KQU( 3108893142681931848), KQU(16636841767202792021),
-	KQU(10423316431118400637), KQU( 8008357368674443506),
-	KQU(11340015231914677875), KQU(17687896501594936090),
-	KQU(15173627921763199958), KQU(  542569482243721959),
-	KQU(15071714982769812975), KQU( 4466624872151386956),
-	KQU( 1901780715602332461), KQU( 9822227742154351098),
-	KQU( 1479332892928648780), KQU( 6981611948382474400),
-	KQU( 7620824924456077376), KQU(14095973329429406782),
-	KQU( 7902744005696185404), KQU(15830577219375036920),
-	KQU(10287076667317764416), KQU(12334872764071724025),
-	KQU( 4419302088133544331), KQU(14455842851266090520),
-	KQU(12488077416504654222), KQU( 7953892017701886766),
-	KQU( 6331484925529519007), KQU( 4902145853785030022),
-	KQU(17010159216096443073), KQU(11945354668653886087),
-	KQU(15112022728645230829), KQU(17363484484522986742),
-	KQU( 4423497825896692887), KQU( 8155489510809067471),
-	KQU(  258966605622576285), KQU( 5462958075742020534),
-	KQU( 6763710214913276228), KQU( 2368935183451109054),
-	KQU(14209506165246453811), KQU( 2646257040978514881),
-	KQU( 3776001911922207672), KQU( 1419304601390147631),
-	KQU(14987366598022458284), KQU( 3977770701065815721),
-	KQU(  730820417451838898), KQU( 3982991703612885327),
-	KQU( 2803544519671388477), KQU(17067667221114424649),
-	KQU( 2922555119737867166), KQU( 1989477584121460932),
-	KQU(15020387605892337354), KQU( 9293277796427533547),
-	KQU(10722181424063557247), KQU(16704542332047511651),
-	KQU( 5008286236142089514), KQU(16174732308747382540),
-	KQU(17597019485798338402), KQU(13081745199110622093),
-	KQU( 8850305883842258115), KQU(12723629125624589005),
-	KQU( 8140566453402805978), KQU(15356684607680935061),
-	KQU(14222190387342648650), KQU(11134610460665975178),
-	KQU( 1259799058620984266), KQU(13281656268025610041),
-	KQU(  298262561068153992), KQU(12277871700239212922),
-	KQU(13911297774719779438), KQU(16556727962761474934),
-	KQU(17903010316654728010), KQU( 9682617699648434744),
-	KQU(14757681836838592850), KQU( 1327242446558524473),
-	KQU(11126645098780572792), KQU( 1883602329313221774),
-	KQU( 2543897783922776873), KQU(15029168513767772842),
-	KQU(12710270651039129878), KQU(16118202956069604504),
-	KQU(15010759372168680524), KQU( 2296827082251923948),
-	KQU(10793729742623518101), KQU(13829764151845413046),
-	KQU(17769301223184451213), KQU( 3118268169210783372),
-	KQU(17626204544105123127), KQU( 7416718488974352644),
-	KQU(10450751996212925994), KQU( 9352529519128770586),
-	KQU(  259347569641110140), KQU( 8048588892269692697),
-	KQU( 1774414152306494058), KQU(10669548347214355622),
-	KQU(13061992253816795081), KQU(18432677803063861659),
-	KQU( 8879191055593984333), KQU(12433753195199268041),
-	KQU(14919392415439730602), KQU( 6612848378595332963),
-	KQU( 6320986812036143628), KQU(10465592420226092859),
-	KQU( 4196009278962570808), KQU( 3747816564473572224),
-	KQU(17941203486133732898), KQU( 2350310037040505198),
-	KQU( 5811779859134370113), KQU(10492109599506195126),
-	KQU( 7699650690179541274), KQU( 1954338494306022961),
-	KQU(14095816969027231152), KQU( 5841346919964852061),
-	KQU(14945969510148214735), KQU( 3680200305887550992),
-	KQU( 6218047466131695792), KQU( 8242165745175775096),
-	KQU(11021371934053307357), KQU( 1265099502753169797),
-	KQU( 4644347436111321718), KQU( 3609296916782832859),
-	KQU( 8109807992218521571), KQU(18387884215648662020),
-	KQU(14656324896296392902), KQU(17386819091238216751),
-	KQU(17788300878582317152), KQU( 7919446259742399591),
-	KQU( 4466613134576358004), KQU(12928181023667938509),
-	KQU(13147446154454932030), KQU(16552129038252734620),
-	KQU( 8395299403738822450), KQU(11313817655275361164),
-	KQU(  434258809499511718), KQU( 2074882104954788676),
-	KQU( 7929892178759395518), KQU( 9006461629105745388),
-	KQU( 5176475650000323086), KQU(11128357033468341069),
-	KQU(12026158851559118955), KQU(14699716249471156500),
-	KQU(  448982497120206757), KQU( 4156475356685519900),
-	KQU( 6063816103417215727), KQU(10073289387954971479),
-	KQU( 8174466846138590962), KQU( 2675777452363449006),
-	KQU( 9090685420572474281), KQU( 6659652652765562060),
-	KQU(12923120304018106621), KQU(11117480560334526775),
-	KQU(  937910473424587511), KQU( 1838692113502346645),
-	KQU(11133914074648726180), KQU( 7922600945143884053),
-	KQU(13435287702700959550), KQU( 5287964921251123332),
-	KQU(11354875374575318947), KQU(17955724760748238133),
-	KQU(13728617396297106512), KQU( 4107449660118101255),
-	KQU( 1210269794886589623), KQU(11408687205733456282),
-	KQU( 4538354710392677887), KQU(13566803319341319267),
-	KQU(17870798107734050771), KQU( 3354318982568089135),
-	KQU( 9034450839405133651), KQU(13087431795753424314),
-	KQU(  950333102820688239), KQU( 1968360654535604116),
-	KQU(16840551645563314995), KQU( 8867501803892924995),
-	KQU(11395388644490626845), KQU( 1529815836300732204),
-	KQU(13330848522996608842), KQU( 1813432878817504265),
-	KQU( 2336867432693429560), KQU(15192805445973385902),
-	KQU( 2528593071076407877), KQU(  128459777936689248),
-	KQU( 9976345382867214866), KQU( 6208885766767996043),
-	KQU(14982349522273141706), KQU( 3099654362410737822),
-	KQU(13776700761947297661), KQU( 8806185470684925550),
-	KQU( 8151717890410585321), KQU(  640860591588072925),
-	KQU(14592096303937307465), KQU( 9056472419613564846),
-	KQU(14861544647742266352), KQU(12703771500398470216),
-	KQU( 3142372800384138465), KQU( 6201105606917248196),
-	KQU(18337516409359270184), KQU(15042268695665115339),
-	KQU(15188246541383283846), KQU(12800028693090114519),
-	KQU( 5992859621101493472), KQU(18278043971816803521),
-	KQU( 9002773075219424560), KQU( 7325707116943598353),
-	KQU( 7930571931248040822), KQU( 5645275869617023448),
-	KQU( 7266107455295958487), KQU( 4363664528273524411),
-	KQU(14313875763787479809), KQU(17059695613553486802),
-	KQU( 9247761425889940932), KQU(13704726459237593128),
-	KQU( 2701312427328909832), KQU(17235532008287243115),
-	KQU(14093147761491729538), KQU( 6247352273768386516),
-	KQU( 8268710048153268415), KQU( 7985295214477182083),
-	KQU(15624495190888896807), KQU( 3772753430045262788),
-	KQU( 9133991620474991698), KQU( 5665791943316256028),
-	KQU( 7551996832462193473), KQU(13163729206798953877),
-	KQU( 9263532074153846374), KQU( 1015460703698618353),
-	KQU(17929874696989519390), KQU(18257884721466153847),
-	KQU(16271867543011222991), KQU( 3905971519021791941),
-	KQU(16814488397137052085), KQU( 1321197685504621613),
-	KQU( 2870359191894002181), KQU(14317282970323395450),
-	KQU(13663920845511074366), KQU( 2052463995796539594),
-	KQU(14126345686431444337), KQU( 1727572121947022534),
-	KQU(17793552254485594241), KQU( 6738857418849205750),
-	KQU( 1282987123157442952), KQU(16655480021581159251),
-	KQU( 6784587032080183866), KQU(14726758805359965162),
-	KQU( 7577995933961987349), KQU(12539609320311114036),
-	KQU(10789773033385439494), KQU( 8517001497411158227),
-	KQU(10075543932136339710), KQU(14838152340938811081),
-	KQU( 9560840631794044194), KQU(17445736541454117475),
-	KQU(10633026464336393186), KQU(15705729708242246293),
-	KQU( 1117517596891411098), KQU( 4305657943415886942),
-	KQU( 4948856840533979263), KQU(16071681989041789593),
-	KQU(13723031429272486527), KQU( 7639567622306509462),
-	KQU(12670424537483090390), KQU( 9715223453097197134),
-	KQU( 5457173389992686394), KQU(  289857129276135145),
-	KQU(17048610270521972512), KQU(  692768013309835485),
-	KQU(14823232360546632057), KQU(18218002361317895936),
-	KQU( 3281724260212650204), KQU(16453957266549513795),
-	KQU( 8592711109774511881), KQU(  929825123473369579),
-	KQU(15966784769764367791), KQU( 9627344291450607588),
-	KQU(10849555504977813287), KQU( 9234566913936339275),
-	KQU( 6413807690366911210), KQU(10862389016184219267),
-	KQU(13842504799335374048), KQU( 1531994113376881174),
-	KQU( 2081314867544364459), KQU(16430628791616959932),
-	KQU( 8314714038654394368), KQU( 9155473892098431813),
-	KQU(12577843786670475704), KQU( 4399161106452401017),
-	KQU( 1668083091682623186), KQU( 1741383777203714216),
-	KQU( 2162597285417794374), KQU(15841980159165218736),
-	KQU( 1971354603551467079), KQU( 1206714764913205968),
-	KQU( 4790860439591272330), KQU(14699375615594055799),
-	KQU( 8374423871657449988), KQU(10950685736472937738),
-	KQU(  697344331343267176), KQU(10084998763118059810),
-	KQU(12897369539795983124), KQU(12351260292144383605),
-	KQU( 1268810970176811234), KQU( 7406287800414582768),
-	KQU(  516169557043807831), KQU( 5077568278710520380),
-	KQU( 3828791738309039304), KQU( 7721974069946943610),
-	KQU( 3534670260981096460), KQU( 4865792189600584891),
-	KQU(16892578493734337298), KQU( 9161499464278042590),
-	KQU(11976149624067055931), KQU(13219479887277343990),
-	KQU(14161556738111500680), KQU(14670715255011223056),
-	KQU( 4671205678403576558), KQU(12633022931454259781),
-	KQU(14821376219869187646), KQU(  751181776484317028),
-	KQU( 2192211308839047070), KQU(11787306362361245189),
-	KQU(10672375120744095707), KQU( 4601972328345244467),
-	KQU(15457217788831125879), KQU( 8464345256775460809),
-	KQU(10191938789487159478), KQU( 6184348739615197613),
-	KQU(11425436778806882100), KQU( 2739227089124319793),
-	KQU(  461464518456000551), KQU( 4689850170029177442),
-	KQU( 6120307814374078625), KQU(11153579230681708671),
-	KQU( 7891721473905347926), KQU(10281646937824872400),
-	KQU( 3026099648191332248), KQU( 8666750296953273818),
-	KQU(14978499698844363232), KQU(13303395102890132065),
-	KQU( 8182358205292864080), KQU(10560547713972971291),
-	KQU(11981635489418959093), KQU( 3134621354935288409),
-	KQU(11580681977404383968), KQU(14205530317404088650),
-	KQU( 5997789011854923157), KQU(13659151593432238041),
-	KQU(11664332114338865086), KQU( 7490351383220929386),
-	KQU( 7189290499881530378), KQU(15039262734271020220),
-	KQU( 2057217285976980055), KQU(  555570804905355739),
-	KQU(11235311968348555110), KQU(13824557146269603217),
-	KQU(16906788840653099693), KQU( 7222878245455661677),
-	KQU( 5245139444332423756), KQU( 4723748462805674292),
-	KQU(12216509815698568612), KQU(17402362976648951187),
-	KQU(17389614836810366768), KQU( 4880936484146667711),
-	KQU( 9085007839292639880), KQU(13837353458498535449),
-	KQU(11914419854360366677), KQU(16595890135313864103),
-	KQU( 6313969847197627222), KQU(18296909792163910431),
-	KQU(10041780113382084042), KQU( 2499478551172884794),
-	KQU(11057894246241189489), KQU( 9742243032389068555),
-	KQU(12838934582673196228), KQU(13437023235248490367),
-	KQU(13372420669446163240), KQU( 6752564244716909224),
-	KQU( 7157333073400313737), KQU(12230281516370654308),
-	KQU( 1182884552219419117), KQU( 2955125381312499218),
-	KQU(10308827097079443249), KQU( 1337648572986534958),
-	KQU(16378788590020343939), KQU(  108619126514420935),
-	KQU( 3990981009621629188), KQU( 5460953070230946410),
-	KQU( 9703328329366531883), KQU(13166631489188077236),
-	KQU( 1104768831213675170), KQU( 3447930458553877908),
-	KQU( 8067172487769945676), KQU( 5445802098190775347),
-	KQU( 3244840981648973873), KQU(17314668322981950060),
-	KQU( 5006812527827763807), KQU(18158695070225526260),
-	KQU( 2824536478852417853), KQU(13974775809127519886),
-	KQU( 9814362769074067392), KQU(17276205156374862128),
-	KQU(11361680725379306967), KQU( 3422581970382012542),
-	KQU(11003189603753241266), KQU(11194292945277862261),
-	KQU( 6839623313908521348), KQU(11935326462707324634),
-	KQU( 1611456788685878444), KQU(13112620989475558907),
-	KQU(  517659108904450427), KQU(13558114318574407624),
-	KQU(15699089742731633077), KQU( 4988979278862685458),
-	KQU( 8111373583056521297), KQU( 3891258746615399627),
-	KQU( 8137298251469718086), KQU(12748663295624701649),
-	KQU( 4389835683495292062), KQU( 5775217872128831729),
-	KQU( 9462091896405534927), KQU( 8498124108820263989),
-	KQU( 8059131278842839525), KQU(10503167994254090892),
-	KQU(11613153541070396656), KQU(18069248738504647790),
-	KQU(  570657419109768508), KQU( 3950574167771159665),
-	KQU( 5514655599604313077), KQU( 2908460854428484165),
-	KQU(10777722615935663114), KQU(12007363304839279486),
-	KQU( 9800646187569484767), KQU( 8795423564889864287),
-	KQU(14257396680131028419), KQU( 6405465117315096498),
-	KQU( 7939411072208774878), KQU(17577572378528990006),
-	KQU(14785873806715994850), KQU(16770572680854747390),
-	KQU(18127549474419396481), KQU(11637013449455757750),
-	KQU(14371851933996761086), KQU( 3601181063650110280),
-	KQU( 4126442845019316144), KQU(10198287239244320669),
-	KQU(18000169628555379659), KQU(18392482400739978269),
-	KQU( 6219919037686919957), KQU( 3610085377719446052),
-	KQU( 2513925039981776336), KQU(16679413537926716955),
-	KQU(12903302131714909434), KQU( 5581145789762985009),
-	KQU(12325955044293303233), KQU(17216111180742141204),
-	KQU( 6321919595276545740), KQU( 3507521147216174501),
-	KQU( 9659194593319481840), KQU(11473976005975358326),
-	KQU(14742730101435987026), KQU(  492845897709954780),
-	KQU(16976371186162599676), KQU(17712703422837648655),
-	KQU( 9881254778587061697), KQU( 8413223156302299551),
-	KQU( 1563841828254089168), KQU( 9996032758786671975),
-	KQU(  138877700583772667), KQU(13003043368574995989),
-	KQU( 4390573668650456587), KQU( 8610287390568126755),
-	KQU(15126904974266642199), KQU( 6703637238986057662),
-	KQU( 2873075592956810157), KQU( 6035080933946049418),
-	KQU(13382846581202353014), KQU( 7303971031814642463),
-	KQU(18418024405307444267), KQU( 5847096731675404647),
-	KQU( 4035880699639842500), KQU(11525348625112218478),
-	KQU( 3041162365459574102), KQU( 2604734487727986558),
-	KQU(15526341771636983145), KQU(14556052310697370254),
-	KQU(12997787077930808155), KQU( 9601806501755554499),
-	KQU(11349677952521423389), KQU(14956777807644899350),
-	KQU(16559736957742852721), KQU(12360828274778140726),
-	KQU( 6685373272009662513), KQU(16932258748055324130),
-	KQU(15918051131954158508), KQU( 1692312913140790144),
-	KQU(  546653826801637367), KQU( 5341587076045986652),
-	KQU(14975057236342585662), KQU(12374976357340622412),
-	KQU(10328833995181940552), KQU(12831807101710443149),
-	KQU(10548514914382545716), KQU( 2217806727199715993),
-	KQU(12627067369242845138), KQU( 4598965364035438158),
-	KQU(  150923352751318171), KQU(14274109544442257283),
-	KQU( 4696661475093863031), KQU( 1505764114384654516),
-	KQU(10699185831891495147), KQU( 2392353847713620519),
-	KQU( 3652870166711788383), KQU( 8640653276221911108),
-	KQU( 3894077592275889704), KQU( 4918592872135964845),
-	KQU(16379121273281400789), KQU(12058465483591683656),
-	KQU(11250106829302924945), KQU( 1147537556296983005),
-	KQU( 6376342756004613268), KQU(14967128191709280506),
-	KQU(18007449949790627628), KQU( 9497178279316537841),
-	KQU( 7920174844809394893), KQU(10037752595255719907),
-	KQU(15875342784985217697), KQU(15311615921712850696),
-	KQU( 9552902652110992950), KQU(14054979450099721140),
-	KQU( 5998709773566417349), KQU(18027910339276320187),
-	KQU( 8223099053868585554), KQU( 7842270354824999767),
-	KQU( 4896315688770080292), KQU(12969320296569787895),
-	KQU( 2674321489185759961), KQU( 4053615936864718439),
-	KQU(11349775270588617578), KQU( 4743019256284553975),
-	KQU( 5602100217469723769), KQU(14398995691411527813),
-	KQU( 7412170493796825470), KQU(  836262406131744846),
-	KQU( 8231086633845153022), KQU( 5161377920438552287),
-	KQU( 8828731196169924949), KQU(16211142246465502680),
-	KQU( 3307990879253687818), KQU( 5193405406899782022),
-	KQU( 8510842117467566693), KQU( 6070955181022405365),
-	KQU(14482950231361409799), KQU(12585159371331138077),
-	KQU( 3511537678933588148), KQU( 2041849474531116417),
-	KQU(10944936685095345792), KQU(18303116923079107729),
-	KQU( 2720566371239725320), KQU( 4958672473562397622),
-	KQU( 3032326668253243412), KQU(13689418691726908338),
-	KQU( 1895205511728843996), KQU( 8146303515271990527),
-	KQU(16507343500056113480), KQU(  473996939105902919),
-	KQU( 9897686885246881481), KQU(14606433762712790575),
-	KQU( 6732796251605566368), KQU( 1399778120855368916),
-	KQU(  935023885182833777), KQU(16066282816186753477),
-	KQU( 7291270991820612055), KQU(17530230393129853844),
-	KQU(10223493623477451366), KQU(15841725630495676683),
-	KQU(17379567246435515824), KQU( 8588251429375561971),
-	KQU(18339511210887206423), KQU(17349587430725976100),
-	KQU(12244876521394838088), KQU( 6382187714147161259),
-	KQU(12335807181848950831), KQU(16948885622305460665),
-	KQU(13755097796371520506), KQU(14806740373324947801),
-	KQU( 4828699633859287703), KQU( 8209879281452301604),
-	KQU(12435716669553736437), KQU(13970976859588452131),
-	KQU( 6233960842566773148), KQU(12507096267900505759),
-	KQU( 1198713114381279421), KQU(14989862731124149015),
-	KQU(15932189508707978949), KQU( 2526406641432708722),
-	KQU(   29187427817271982), KQU( 1499802773054556353),
-	KQU(10816638187021897173), KQU( 5436139270839738132),
-	KQU( 6659882287036010082), KQU( 2154048955317173697),
-	KQU(10887317019333757642), KQU(16281091802634424955),
-	KQU(10754549879915384901), KQU(10760611745769249815),
-	KQU( 2161505946972504002), KQU( 5243132808986265107),
-	KQU(10129852179873415416), KQU(  710339480008649081),
-	KQU( 7802129453068808528), KQU(17967213567178907213),
-	KQU(15730859124668605599), KQU(13058356168962376502),
-	KQU( 3701224985413645909), KQU(14464065869149109264),
-	KQU( 9959272418844311646), KQU(10157426099515958752),
-	KQU(14013736814538268528), KQU(17797456992065653951),
-	KQU(17418878140257344806), KQU(15457429073540561521),
-	KQU( 2184426881360949378), KQU( 2062193041154712416),
-	KQU( 8553463347406931661), KQU( 4913057625202871854),
-	KQU( 2668943682126618425), KQU(17064444737891172288),
-	KQU( 4997115903913298637), KQU(12019402608892327416),
-	KQU(17603584559765897352), KQU(11367529582073647975),
-	KQU( 8211476043518436050), KQU( 8676849804070323674),
-	KQU(18431829230394475730), KQU(10490177861361247904),
-	KQU( 9508720602025651349), KQU( 7409627448555722700),
-	KQU( 5804047018862729008), KQU(11943858176893142594),
-	KQU(11908095418933847092), KQU( 5415449345715887652),
-	KQU( 1554022699166156407), KQU( 9073322106406017161),
-	KQU( 7080630967969047082), KQU(18049736940860732943),
-	KQU(12748714242594196794), KQU( 1226992415735156741),
-	KQU(17900981019609531193), KQU(11720739744008710999),
-	KQU( 3006400683394775434), KQU(11347974011751996028),
-	KQU( 3316999628257954608), KQU( 8384484563557639101),
-	KQU(18117794685961729767), KQU( 1900145025596618194),
-	KQU(17459527840632892676), KQU( 5634784101865710994),
-	KQU( 7918619300292897158), KQU( 3146577625026301350),
-	KQU( 9955212856499068767), KQU( 1873995843681746975),
-	KQU( 1561487759967972194), KQU( 8322718804375878474),
-	KQU(11300284215327028366), KQU( 4667391032508998982),
-	KQU( 9820104494306625580), KQU(17922397968599970610),
-	KQU( 1784690461886786712), KQU(14940365084341346821),
-	KQU( 5348719575594186181), KQU(10720419084507855261),
-	KQU(14210394354145143274), KQU( 2426468692164000131),
-	KQU(16271062114607059202), KQU(14851904092357070247),
-	KQU( 6524493015693121897), KQU( 9825473835127138531),
-	KQU(14222500616268569578), KQU(15521484052007487468),
-	KQU(14462579404124614699), KQU(11012375590820665520),
-	KQU(11625327350536084927), KQU(14452017765243785417),
-	KQU( 9989342263518766305), KQU( 3640105471101803790),
-	KQU( 4749866455897513242), KQU(13963064946736312044),
-	KQU(10007416591973223791), KQU(18314132234717431115),
-	KQU( 3286596588617483450), KQU( 7726163455370818765),
-	KQU( 7575454721115379328), KQU( 5308331576437663422),
-	KQU(18288821894903530934), KQU( 8028405805410554106),
-	KQU(15744019832103296628), KQU(  149765559630932100),
-	KQU( 6137705557200071977), KQU(14513416315434803615),
-	KQU(11665702820128984473), KQU(  218926670505601386),
-	KQU( 6868675028717769519), KQU(15282016569441512302),
-	KQU( 5707000497782960236), KQU( 6671120586555079567),
-	KQU( 2194098052618985448), KQU(16849577895477330978),
-	KQU(12957148471017466283), KQU( 1997805535404859393),
-	KQU( 1180721060263860490), KQU(13206391310193756958),
-	KQU(12980208674461861797), KQU( 3825967775058875366),
-	KQU(17543433670782042631), KQU( 1518339070120322730),
-	KQU(16344584340890991669), KQU( 2611327165318529819),
-	KQU(11265022723283422529), KQU( 4001552800373196817),
-	KQU(14509595890079346161), KQU( 3528717165416234562),
-	KQU(18153222571501914072), KQU( 9387182977209744425),
-	KQU(10064342315985580021), KQU(11373678413215253977),
-	KQU( 2308457853228798099), KQU( 9729042942839545302),
-	KQU( 7833785471140127746), KQU( 6351049900319844436),
-	KQU(14454610627133496067), KQU(12533175683634819111),
-	KQU(15570163926716513029), KQU(13356980519185762498)
-};
+static const uint32_t init_gen_rand_32_expected[] = {3440181298U, 1564997079U,
+    1510669302U, 2930277156U, 1452439940U, 3796268453U, 423124208U, 2143818589U,
+    3827219408U, 2987036003U, 2674978610U, 1536842514U, 2027035537U,
+    2534897563U, 1686527725U, 545368292U, 1489013321U, 1370534252U, 4231012796U,
+    3994803019U, 1764869045U, 824597505U, 862581900U, 2469764249U, 812862514U,
+    359318673U, 116957936U, 3367389672U, 2327178354U, 1898245200U, 3206507879U,
+    2378925033U, 1040214787U, 2524778605U, 3088428700U, 1417665896U, 964324147U,
+    2282797708U, 2456269299U, 313400376U, 2245093271U, 1015729427U, 2694465011U,
+    3246975184U, 1992793635U, 463679346U, 3721104591U, 3475064196U, 856141236U,
+    1499559719U, 3522818941U, 3721533109U, 1954826617U, 1282044024U,
+    1543279136U, 1301863085U, 2669145051U, 4221477354U, 3896016841U,
+    3392740262U, 462466863U, 1037679449U, 1228140306U, 922298197U, 1205109853U,
+    1872938061U, 3102547608U, 2742766808U, 1888626088U, 4028039414U, 157593879U,
+    1136901695U, 4038377686U, 3572517236U, 4231706728U, 2997311961U,
+    1189931652U, 3981543765U, 2826166703U, 87159245U, 1721379072U, 3897926942U,
+    1790395498U, 2569178939U, 1047368729U, 2340259131U, 3144212906U,
+    2301169789U, 2442885464U, 3034046771U, 3667880593U, 3935928400U,
+    2372805237U, 1666397115U, 2460584504U, 513866770U, 3810869743U, 2147400037U,
+    2792078025U, 2941761810U, 3212265810U, 984692259U, 346590253U, 1804179199U,
+    3298543443U, 750108141U, 2880257022U, 243310542U, 1869036465U, 1588062513U,
+    2983949551U, 1931450364U, 4034505847U, 2735030199U, 1628461061U,
+    2539522841U, 127965585U, 3992448871U, 913388237U, 559130076U, 1202933193U,
+    4087643167U, 2590021067U, 2256240196U, 1746697293U, 1013913783U,
+    1155864921U, 2715773730U, 915061862U, 1948766573U, 2322882854U, 3761119102U,
+    1343405684U, 3078711943U, 3067431651U, 3245156316U, 3588354584U,
+    3484623306U, 3899621563U, 4156689741U, 3237090058U, 3880063844U, 862416318U,
+    4039923869U, 2303788317U, 3073590536U, 701653667U, 2131530884U, 3169309950U,
+    2028486980U, 747196777U, 3620218225U, 432016035U, 1449580595U, 2772266392U,
+    444224948U, 1662832057U, 3184055582U, 3028331792U, 1861686254U, 1104864179U,
+    342430307U, 1350510923U, 3024656237U, 1028417492U, 2870772950U, 290847558U,
+    3675663500U, 508431529U, 4264340390U, 2263569913U, 1669302976U, 519511383U,
+    2706411211U, 3764615828U, 3883162495U, 4051445305U, 2412729798U,
+    3299405164U, 3991911166U, 2348767304U, 2664054906U, 3763609282U, 593943581U,
+    3757090046U, 2075338894U, 2020550814U, 4287452920U, 4290140003U,
+    1422957317U, 2512716667U, 2003485045U, 2307520103U, 2288472169U,
+    3940751663U, 4204638664U, 2892583423U, 1710068300U, 3904755993U,
+    2363243951U, 3038334120U, 547099465U, 771105860U, 3199983734U, 4282046461U,
+    2298388363U, 934810218U, 2837827901U, 3952500708U, 2095130248U, 3083335297U,
+    26885281U, 3932155283U, 1531751116U, 1425227133U, 495654159U, 3279634176U,
+    3855562207U, 3957195338U, 4159985527U, 893375062U, 1875515536U, 1327247422U,
+    3754140693U, 1028923197U, 1729880440U, 805571298U, 448971099U, 2726757106U,
+    2749436461U, 2485987104U, 175337042U, 3235477922U, 3882114302U, 2020970972U,
+    943926109U, 2762587195U, 1904195558U, 3452650564U, 108432281U, 3893463573U,
+    3977583081U, 2636504348U, 1110673525U, 3548479841U, 4258854744U, 980047703U,
+    4057175418U, 3890008292U, 145653646U, 3141868989U, 3293216228U, 1194331837U,
+    1254570642U, 3049934521U, 2868313360U, 2886032750U, 1110873820U, 279553524U,
+    3007258565U, 1104807822U, 3186961098U, 315764646U, 2163680838U, 3574508994U,
+    3099755655U, 191957684U, 3642656737U, 3317946149U, 3522087636U, 444526410U,
+    779157624U, 1088229627U, 1092460223U, 1856013765U, 3659877367U, 368270451U,
+    503570716U, 3000984671U, 2742789647U, 928097709U, 2914109539U, 308843566U,
+    2816161253U, 3667192079U, 2762679057U, 3395240989U, 2928925038U,
+    1491465914U, 3458702834U, 3787782576U, 2894104823U, 1296880455U,
+    1253636503U, 989959407U, 2291560361U, 2776790436U, 1913178042U, 1584677829U,
+    689637520U, 1898406878U, 688391508U, 3385234998U, 845493284U, 1943591856U,
+    2720472050U, 222695101U, 1653320868U, 2904632120U, 4084936008U, 1080720688U,
+    3938032556U, 387896427U, 2650839632U, 99042991U, 1720913794U, 1047186003U,
+    1877048040U, 2090457659U, 517087501U, 4172014665U, 2129713163U, 2413533132U,
+    2760285054U, 4129272496U, 1317737175U, 2309566414U, 2228873332U,
+    3889671280U, 1110864630U, 3576797776U, 2074552772U, 832002644U, 3097122623U,
+    2464859298U, 2679603822U, 1667489885U, 3237652716U, 1478413938U,
+    1719340335U, 2306631119U, 639727358U, 3369698270U, 226902796U, 2099920751U,
+    1892289957U, 2201594097U, 3508197013U, 3495811856U, 3900381493U, 841660320U,
+    3974501451U, 3360949056U, 1676829340U, 728899254U, 2047809627U, 2390948962U,
+    670165943U, 3412951831U, 4189320049U, 1911595255U, 2055363086U, 507170575U,
+    418219594U, 4141495280U, 2692088692U, 4203630654U, 3540093932U, 791986533U,
+    2237921051U, 2526864324U, 2956616642U, 1394958700U, 1983768223U,
+    1893373266U, 591653646U, 228432437U, 1611046598U, 3007736357U, 1040040725U,
+    2726180733U, 2789804360U, 4263568405U, 829098158U, 3847722805U, 1123578029U,
+    1804276347U, 997971319U, 4203797076U, 4185199713U, 2811733626U, 2343642194U,
+    2985262313U, 1417930827U, 3759587724U, 1967077982U, 1585223204U,
+    1097475516U, 1903944948U, 740382444U, 1114142065U, 1541796065U, 1718384172U,
+    1544076191U, 1134682254U, 3519754455U, 2866243923U, 341865437U, 645498576U,
+    2690735853U, 1046963033U, 2493178460U, 1187604696U, 1619577821U, 488503634U,
+    3255768161U, 2306666149U, 1630514044U, 2377698367U, 2751503746U,
+    3794467088U, 1796415981U, 3657173746U, 409136296U, 1387122342U, 1297726519U,
+    219544855U, 4270285558U, 437578827U, 1444698679U, 2258519491U, 963109892U,
+    3982244073U, 3351535275U, 385328496U, 1804784013U, 698059346U, 3920535147U,
+    708331212U, 784338163U, 785678147U, 1238376158U, 1557298846U, 2037809321U,
+    271576218U, 4145155269U, 1913481602U, 2763691931U, 588981080U, 1201098051U,
+    3717640232U, 1509206239U, 662536967U, 3180523616U, 1133105435U, 2963500837U,
+    2253971215U, 3153642623U, 1066925709U, 2582781958U, 3034720222U,
+    1090798544U, 2942170004U, 4036187520U, 686972531U, 2610990302U, 2641437026U,
+    1837562420U, 722096247U, 1315333033U, 2102231203U, 3402389208U, 3403698140U,
+    1312402831U, 2898426558U, 814384596U, 385649582U, 1916643285U, 1924625106U,
+    2512905582U, 2501170304U, 4275223366U, 2841225246U, 1467663688U,
+    3563567847U, 2969208552U, 884750901U, 102992576U, 227844301U, 3681442994U,
+    3502881894U, 4034693299U, 1166727018U, 1697460687U, 1737778332U,
+    1787161139U, 1053003655U, 1215024478U, 2791616766U, 2525841204U,
+    1629323443U, 3233815U, 2003823032U, 3083834263U, 2379264872U, 3752392312U,
+    1287475550U, 3770904171U, 3004244617U, 1502117784U, 918698423U, 2419857538U,
+    3864502062U, 1751322107U, 2188775056U, 4018728324U, 983712955U, 440071928U,
+    3710838677U, 2001027698U, 3994702151U, 22493119U, 3584400918U, 3446253670U,
+    4254789085U, 1405447860U, 1240245579U, 1800644159U, 1661363424U,
+    3278326132U, 3403623451U, 67092802U, 2609352193U, 3914150340U, 1814842761U,
+    3610830847U, 591531412U, 3880232807U, 1673505890U, 2585326991U, 1678544474U,
+    3148435887U, 3457217359U, 1193226330U, 2816576908U, 154025329U, 121678860U,
+    1164915738U, 973873761U, 269116100U, 52087970U, 744015362U, 498556057U,
+    94298882U, 1563271621U, 2383059628U, 4197367290U, 3958472990U, 2592083636U,
+    2906408439U, 1097742433U, 3924840517U, 264557272U, 2292287003U, 3203307984U,
+    4047038857U, 3820609705U, 2333416067U, 1839206046U, 3600944252U,
+    3412254904U, 583538222U, 2390557166U, 4140459427U, 2810357445U, 226777499U,
+    2496151295U, 2207301712U, 3283683112U, 611630281U, 1933218215U, 3315610954U,
+    3889441987U, 3719454256U, 3957190521U, 1313998161U, 2365383016U,
+    3146941060U, 1801206260U, 796124080U, 2076248581U, 1747472464U, 3254365145U,
+    595543130U, 3573909503U, 3758250204U, 2020768540U, 2439254210U, 93368951U,
+    3155792250U, 2600232980U, 3709198295U, 3894900440U, 2971850836U,
+    1578909644U, 1443493395U, 2581621665U, 3086506297U, 2443465861U, 558107211U,
+    1519367835U, 249149686U, 908102264U, 2588765675U, 1232743965U, 1001330373U,
+    3561331654U, 2259301289U, 1564977624U, 3835077093U, 727244906U, 4255738067U,
+    1214133513U, 2570786021U, 3899704621U, 1633861986U, 1636979509U,
+    1438500431U, 58463278U, 2823485629U, 2297430187U, 2926781924U, 3371352948U,
+    1864009023U, 2722267973U, 1444292075U, 437703973U, 1060414512U, 189705863U,
+    910018135U, 4077357964U, 884213423U, 2644986052U, 3973488374U, 1187906116U,
+    2331207875U, 780463700U, 3713351662U, 3854611290U, 412805574U, 2978462572U,
+    2176222820U, 829424696U, 2790788332U, 2750819108U, 1594611657U, 3899878394U,
+    3032870364U, 1702887682U, 1948167778U, 14130042U, 192292500U, 947227076U,
+    90719497U, 3854230320U, 784028434U, 2142399787U, 1563449646U, 2844400217U,
+    819143172U, 2883302356U, 2328055304U, 1328532246U, 2603885363U, 3375188924U,
+    933941291U, 3627039714U, 2129697284U, 2167253953U, 2506905438U, 1412424497U,
+    2981395985U, 1418359660U, 2925902456U, 52752784U, 3713667988U, 3924669405U,
+    648975707U, 1145520213U, 4018650664U, 3805915440U, 2380542088U, 2013260958U,
+    3262572197U, 2465078101U, 1114540067U, 3728768081U, 2396958768U, 590672271U,
+    904818725U, 4263660715U, 700754408U, 1042601829U, 4094111823U, 4274838909U,
+    2512692617U, 2774300207U, 2057306915U, 3470942453U, 99333088U, 1142661026U,
+    2889931380U, 14316674U, 2201179167U, 415289459U, 448265759U, 3515142743U,
+    3254903683U, 246633281U, 1184307224U, 2418347830U, 2092967314U, 2682072314U,
+    2558750234U, 2000352263U, 1544150531U, 399010405U, 1513946097U, 499682937U,
+    461167460U, 3045570638U, 1633669705U, 851492362U, 4052801922U, 2055266765U,
+    635556996U, 368266356U, 2385737383U, 3218202352U, 2603772408U, 349178792U,
+    226482567U, 3102426060U, 3575998268U, 2103001871U, 3243137071U, 225500688U,
+    1634718593U, 4283311431U, 4292122923U, 3842802787U, 811735523U, 105712518U,
+    663434053U, 1855889273U, 2847972595U, 1196355421U, 2552150115U, 4254510614U,
+    3752181265U, 3430721819U, 3828705396U, 3436287905U, 3441964937U,
+    4123670631U, 353001539U, 459496439U, 3799690868U, 1293777660U, 2761079737U,
+    498096339U, 3398433374U, 4080378380U, 2304691596U, 2995729055U, 4134660419U,
+    3903444024U, 3576494993U, 203682175U, 3321164857U, 2747963611U, 79749085U,
+    2992890370U, 1240278549U, 1772175713U, 2111331972U, 2655023449U,
+    1683896345U, 2836027212U, 3482868021U, 2489884874U, 756853961U, 2298874501U,
+    4013448667U, 4143996022U, 2948306858U, 4132920035U, 1283299272U, 995592228U,
+    3450508595U, 1027845759U, 1766942720U, 3861411826U, 1446861231U, 95974993U,
+    3502263554U, 1487532194U, 601502472U, 4129619129U, 250131773U, 2050079547U,
+    3198903947U, 3105589778U, 4066481316U, 3026383978U, 2276901713U, 365637751U,
+    2260718426U, 1394775634U, 1791172338U, 2690503163U, 2952737846U,
+    1568710462U, 732623190U, 2980358000U, 1053631832U, 1432426951U, 3229149635U,
+    1854113985U, 3719733532U, 3204031934U, 735775531U, 107468620U, 3734611984U,
+    631009402U, 3083622457U, 4109580626U, 159373458U, 1301970201U, 4132389302U,
+    1293255004U, 847182752U, 4170022737U, 96712900U, 2641406755U, 1381727755U,
+    405608287U, 4287919625U, 1703554290U, 3589580244U, 2911403488U, 2166565U,
+    2647306451U, 2330535117U, 1200815358U, 1165916754U, 245060911U, 4040679071U,
+    3684908771U, 2452834126U, 2486872773U, 2318678365U, 2940627908U,
+    1837837240U, 3447897409U, 4270484676U, 1495388728U, 3754288477U,
+    4204167884U, 1386977705U, 2692224733U, 3076249689U, 4109568048U,
+    4170955115U, 4167531356U, 4020189950U, 4261855038U, 3036907575U,
+    3410399885U, 3076395737U, 1046178638U, 144496770U, 230725846U, 3349637149U,
+    17065717U, 2809932048U, 2054581785U, 3608424964U, 3259628808U, 134897388U,
+    3743067463U, 257685904U, 3795656590U, 1562468719U, 3589103904U, 3120404710U,
+    254684547U, 2653661580U, 3663904795U, 2631942758U, 1063234347U, 2609732900U,
+    2332080715U, 3521125233U, 1180599599U, 1935868586U, 4110970440U, 296706371U,
+    2128666368U, 1319875791U, 1570900197U, 3096025483U, 1799882517U,
+    1928302007U, 1163707758U, 1244491489U, 3533770203U, 567496053U, 2757924305U,
+    2781639343U, 2818420107U, 560404889U, 2619609724U, 4176035430U, 2511289753U,
+    2521842019U, 3910553502U, 2926149387U, 3302078172U, 4237118867U, 330725126U,
+    367400677U, 888239854U, 545570454U, 4259590525U, 134343617U, 1102169784U,
+    1647463719U, 3260979784U, 1518840883U, 3631537963U, 3342671457U,
+    1301549147U, 2083739356U, 146593792U, 3217959080U, 652755743U, 2032187193U,
+    3898758414U, 1021358093U, 4037409230U, 2176407931U, 3427391950U,
+    2883553603U, 985613827U, 3105265092U, 3423168427U, 3387507672U, 467170288U,
+    2141266163U, 3723870208U, 916410914U, 1293987799U, 2652584950U, 769160137U,
+    3205292896U, 1561287359U, 1684510084U, 3136055621U, 3765171391U, 639683232U,
+    2639569327U, 1218546948U, 4263586685U, 3058215773U, 2352279820U, 401870217U,
+    2625822463U, 1529125296U, 2981801895U, 1191285226U, 4027725437U,
+    3432700217U, 4098835661U, 971182783U, 2443861173U, 3881457123U, 3874386651U,
+    457276199U, 2638294160U, 4002809368U, 421169044U, 1112642589U, 3076213779U,
+    3387033971U, 2499610950U, 3057240914U, 1662679783U, 461224431U,
+    1168395933U};
+static const uint32_t init_by_array_32_expected[] = {2920711183U, 3885745737U,
+    3501893680U, 856470934U, 1421864068U, 277361036U, 1518638004U, 2328404353U,
+    3355513634U, 64329189U, 1624587673U, 3508467182U, 2481792141U, 3706480799U,
+    1925859037U, 2913275699U, 882658412U, 384641219U, 422202002U, 1873384891U,
+    2006084383U, 3924929912U, 1636718106U, 3108838742U, 1245465724U,
+    4195470535U, 779207191U, 1577721373U, 1390469554U, 2928648150U, 121399709U,
+    3170839019U, 4044347501U, 953953814U, 3821710850U, 3085591323U, 3666535579U,
+    3577837737U, 2012008410U, 3565417471U, 4044408017U, 433600965U, 1637785608U,
+    1798509764U, 860770589U, 3081466273U, 3982393409U, 2451928325U, 3437124742U,
+    4093828739U, 3357389386U, 2154596123U, 496568176U, 2650035164U, 2472361850U,
+    3438299U, 2150366101U, 1577256676U, 3802546413U, 1787774626U, 4078331588U,
+    3706103141U, 170391138U, 3806085154U, 1680970100U, 1961637521U, 3316029766U,
+    890610272U, 1453751581U, 1430283664U, 3051057411U, 3597003186U, 542563954U,
+    3796490244U, 1690016688U, 3448752238U, 440702173U, 347290497U, 1121336647U,
+    2540588620U, 280881896U, 2495136428U, 213707396U, 15104824U, 2946180358U,
+    659000016U, 566379385U, 2614030979U, 2855760170U, 334526548U, 2315569495U,
+    2729518615U, 564745877U, 1263517638U, 3157185798U, 1604852056U, 1011639885U,
+    2950579535U, 2524219188U, 312951012U, 1528896652U, 1327861054U, 2846910138U,
+    3966855905U, 2536721582U, 855353911U, 1685434729U, 3303978929U, 1624872055U,
+    4020329649U, 3164802143U, 1642802700U, 1957727869U, 1792352426U,
+    3334618929U, 2631577923U, 3027156164U, 842334259U, 3353446843U, 1226432104U,
+    1742801369U, 3552852535U, 3471698828U, 1653910186U, 3380330939U,
+    2313782701U, 3351007196U, 2129839995U, 1800682418U, 4085884420U,
+    1625156629U, 3669701987U, 615211810U, 3294791649U, 4131143784U, 2590843588U,
+    3207422808U, 3275066464U, 561592872U, 3957205738U, 3396578098U, 48410678U,
+    3505556445U, 1005764855U, 3920606528U, 2936980473U, 2378918600U,
+    2404449845U, 1649515163U, 701203563U, 3705256349U, 83714199U, 3586854132U,
+    922978446U, 2863406304U, 3523398907U, 2606864832U, 2385399361U, 3171757816U,
+    4262841009U, 3645837721U, 1169579486U, 3666433897U, 3174689479U,
+    1457866976U, 3803895110U, 3346639145U, 1907224409U, 1978473712U,
+    1036712794U, 980754888U, 1302782359U, 1765252468U, 459245755U, 3728923860U,
+    1512894209U, 2046491914U, 207860527U, 514188684U, 2288713615U, 1597354672U,
+    3349636117U, 2357291114U, 3995796221U, 945364213U, 1893326518U, 3770814016U,
+    1691552714U, 2397527410U, 967486361U, 776416472U, 4197661421U, 951150819U,
+    1852770983U, 4044624181U, 1399439738U, 4194455275U, 2284037669U,
+    1550734958U, 3321078108U, 1865235926U, 2912129961U, 2664980877U,
+    1357572033U, 2600196436U, 2486728200U, 2372668724U, 1567316966U,
+    2374111491U, 1839843570U, 20815612U, 3727008608U, 3871996229U, 824061249U,
+    1932503978U, 3404541726U, 758428924U, 2609331364U, 1223966026U, 1299179808U,
+    648499352U, 2180134401U, 880821170U, 3781130950U, 113491270U, 1032413764U,
+    4185884695U, 2490396037U, 1201932817U, 4060951446U, 4165586898U,
+    1629813212U, 2887821158U, 415045333U, 628926856U, 2193466079U, 3391843445U,
+    2227540681U, 1907099846U, 2848448395U, 1717828221U, 1372704537U,
+    1707549841U, 2294058813U, 2101214437U, 2052479531U, 1695809164U,
+    3176587306U, 2632770465U, 81634404U, 1603220563U, 644238487U, 302857763U,
+    897352968U, 2613146653U, 1391730149U, 4245717312U, 4191828749U, 1948492526U,
+    2618174230U, 3992984522U, 2178852787U, 3596044509U, 3445573503U,
+    2026614616U, 915763564U, 3415689334U, 2532153403U, 3879661562U, 2215027417U,
+    3111154986U, 2929478371U, 668346391U, 1152241381U, 2632029711U, 3004150659U,
+    2135025926U, 948690501U, 2799119116U, 4228829406U, 1981197489U, 4209064138U,
+    684318751U, 3459397845U, 201790843U, 4022541136U, 3043635877U, 492509624U,
+    3263466772U, 1509148086U, 921459029U, 3198857146U, 705479721U, 3835966910U,
+    3603356465U, 576159741U, 1742849431U, 594214882U, 2055294343U, 3634861861U,
+    449571793U, 3246390646U, 3868232151U, 1479156585U, 2900125656U, 2464815318U,
+    3960178104U, 1784261920U, 18311476U, 3627135050U, 644609697U, 424968996U,
+    919890700U, 2986824110U, 816423214U, 4003562844U, 1392714305U, 1757384428U,
+    2569030598U, 995949559U, 3875659880U, 2933807823U, 2752536860U, 2993858466U,
+    4030558899U, 2770783427U, 2775406005U, 2777781742U, 1931292655U, 472147933U,
+    3865853827U, 2726470545U, 2668412860U, 2887008249U, 408979190U, 3578063323U,
+    3242082049U, 1778193530U, 27981909U, 2362826515U, 389875677U, 1043878156U,
+    581653903U, 3830568952U, 389535942U, 3713523185U, 2768373359U, 2526101582U,
+    1998618197U, 1160859704U, 3951172488U, 1098005003U, 906275699U, 3446228002U,
+    2220677963U, 2059306445U, 132199571U, 476838790U, 1868039399U, 3097344807U,
+    857300945U, 396345050U, 2835919916U, 1782168828U, 1419519470U, 4288137521U,
+    819087232U, 596301494U, 872823172U, 1526888217U, 805161465U, 1116186205U,
+    2829002754U, 2352620120U, 620121516U, 354159268U, 3601949785U, 209568138U,
+    1352371732U, 2145977349U, 4236871834U, 1539414078U, 3558126206U,
+    3224857093U, 4164166682U, 3817553440U, 3301780278U, 2682696837U,
+    3734994768U, 1370950260U, 1477421202U, 2521315749U, 1330148125U,
+    1261554731U, 2769143688U, 3554756293U, 4235882678U, 3254686059U,
+    3530579953U, 1215452615U, 3574970923U, 4057131421U, 589224178U, 1000098193U,
+    171190718U, 2521852045U, 2351447494U, 2284441580U, 2646685513U, 3486933563U,
+    3789864960U, 1190528160U, 1702536782U, 1534105589U, 4262946827U,
+    2726686826U, 3584544841U, 2348270128U, 2145092281U, 2502718509U,
+    1027832411U, 3571171153U, 1287361161U, 4011474411U, 3241215351U,
+    2419700818U, 971242709U, 1361975763U, 1096842482U, 3271045537U, 81165449U,
+    612438025U, 3912966678U, 1356929810U, 733545735U, 537003843U, 1282953084U,
+    884458241U, 588930090U, 3930269801U, 2961472450U, 1219535534U, 3632251943U,
+    268183903U, 1441240533U, 3653903360U, 3854473319U, 2259087390U, 2548293048U,
+    2022641195U, 2105543911U, 1764085217U, 3246183186U, 482438805U, 888317895U,
+    2628314765U, 2466219854U, 717546004U, 2322237039U, 416725234U, 1544049923U,
+    1797944973U, 3398652364U, 3111909456U, 485742908U, 2277491072U, 1056355088U,
+    3181001278U, 129695079U, 2693624550U, 1764438564U, 3797785470U, 195503713U,
+    3266519725U, 2053389444U, 1961527818U, 3400226523U, 3777903038U,
+    2597274307U, 4235851091U, 4094406648U, 2171410785U, 1781151386U,
+    1378577117U, 654643266U, 3424024173U, 3385813322U, 679385799U, 479380913U,
+    681715441U, 3096225905U, 276813409U, 3854398070U, 2721105350U, 831263315U,
+    3276280337U, 2628301522U, 3984868494U, 1466099834U, 2104922114U,
+    1412672743U, 820330404U, 3491501010U, 942735832U, 710652807U, 3972652090U,
+    679881088U, 40577009U, 3705286397U, 2815423480U, 3566262429U, 663396513U,
+    3777887429U, 4016670678U, 404539370U, 1142712925U, 1140173408U, 2913248352U,
+    2872321286U, 263751841U, 3175196073U, 3162557581U, 2878996619U, 75498548U,
+    3836833140U, 3284664959U, 1157523805U, 112847376U, 207855609U, 1337979698U,
+    1222578451U, 157107174U, 901174378U, 3883717063U, 1618632639U, 1767889440U,
+    4264698824U, 1582999313U, 884471997U, 2508825098U, 3756370771U, 2457213553U,
+    3565776881U, 3709583214U, 915609601U, 460833524U, 1091049576U, 85522880U,
+    2553251U, 132102809U, 2429882442U, 2562084610U, 1386507633U, 4112471229U,
+    21965213U, 1981516006U, 2418435617U, 3054872091U, 4251511224U, 2025783543U,
+    1916911512U, 2454491136U, 3938440891U, 3825869115U, 1121698605U,
+    3463052265U, 802340101U, 1912886800U, 4031997367U, 3550640406U, 1596096923U,
+    610150600U, 431464457U, 2541325046U, 486478003U, 739704936U, 2862696430U,
+    3037903166U, 1129749694U, 2611481261U, 1228993498U, 510075548U, 3424962587U,
+    2458689681U, 818934833U, 4233309125U, 1608196251U, 3419476016U, 1858543939U,
+    2682166524U, 3317854285U, 631986188U, 3008214764U, 613826412U, 3567358221U,
+    3512343882U, 1552467474U, 3316162670U, 1275841024U, 4142173454U, 565267881U,
+    768644821U, 198310105U, 2396688616U, 1837659011U, 203429334U, 854539004U,
+    4235811518U, 3338304926U, 3730418692U, 3852254981U, 3032046452U,
+    2329811860U, 2303590566U, 2696092212U, 3894665932U, 145835667U, 249563655U,
+    1932210840U, 2431696407U, 3312636759U, 214962629U, 2092026914U, 3020145527U,
+    4073039873U, 2739105705U, 1308336752U, 855104522U, 2391715321U, 67448785U,
+    547989482U, 854411802U, 3608633740U, 431731530U, 537375589U, 3888005760U,
+    696099141U, 397343236U, 1864511780U, 44029739U, 1729526891U, 1993398655U,
+    2010173426U, 2591546756U, 275223291U, 1503900299U, 4217765081U, 2185635252U,
+    1122436015U, 3550155364U, 681707194U, 3260479338U, 933579397U, 2983029282U,
+    2505504587U, 2667410393U, 2962684490U, 4139721708U, 2658172284U,
+    2452602383U, 2607631612U, 1344296217U, 3075398709U, 2949785295U,
+    1049956168U, 3917185129U, 2155660174U, 3280524475U, 1503827867U, 674380765U,
+    1918468193U, 3843983676U, 634358221U, 2538335643U, 1873351298U, 3368723763U,
+    2129144130U, 3203528633U, 3087174986U, 2691698871U, 2516284287U, 24437745U,
+    1118381474U, 2816314867U, 2448576035U, 4281989654U, 217287825U, 165872888U,
+    2628995722U, 3533525116U, 2721669106U, 872340568U, 3429930655U, 3309047304U,
+    3916704967U, 3270160355U, 1348884255U, 1634797670U, 881214967U, 4259633554U,
+    174613027U, 1103974314U, 1625224232U, 2678368291U, 1133866707U, 3853082619U,
+    4073196549U, 1189620777U, 637238656U, 930241537U, 4042750792U, 3842136042U,
+    2417007212U, 2524907510U, 1243036827U, 1282059441U, 3764588774U,
+    1394459615U, 2323620015U, 1166152231U, 3307479609U, 3849322257U,
+    3507445699U, 4247696636U, 758393720U, 967665141U, 1095244571U, 1319812152U,
+    407678762U, 2640605208U, 2170766134U, 3663594275U, 4039329364U, 2512175520U,
+    725523154U, 2249807004U, 3312617979U, 2414634172U, 1278482215U, 349206484U,
+    1573063308U, 1196429124U, 3873264116U, 2400067801U, 268795167U, 226175489U,
+    2961367263U, 1968719665U, 42656370U, 1010790699U, 561600615U, 2422453992U,
+    3082197735U, 1636700484U, 3977715296U, 3125350482U, 3478021514U,
+    2227819446U, 1540868045U, 3061908980U, 1087362407U, 3625200291U, 361937537U,
+    580441897U, 1520043666U, 2270875402U, 1009161260U, 2502355842U, 4278769785U,
+    473902412U, 1057239083U, 1905829039U, 1483781177U, 2080011417U, 1207494246U,
+    1806991954U, 2194674403U, 3455972205U, 807207678U, 3655655687U, 674112918U,
+    195425752U, 3917890095U, 1874364234U, 1837892715U, 3663478166U, 1548892014U,
+    2570748714U, 2049929836U, 2167029704U, 697543767U, 3499545023U, 3342496315U,
+    1725251190U, 3561387469U, 2905606616U, 1580182447U, 3934525927U,
+    4103172792U, 1365672522U, 1534795737U, 3308667416U, 2841911405U,
+    3943182730U, 4072020313U, 3494770452U, 3332626671U, 55327267U, 478030603U,
+    411080625U, 3419529010U, 1604767823U, 3513468014U, 570668510U, 913790824U,
+    2283967995U, 695159462U, 3825542932U, 4150698144U, 1829758699U, 202895590U,
+    1609122645U, 1267651008U, 2910315509U, 2511475445U, 2477423819U,
+    3932081579U, 900879979U, 2145588390U, 2670007504U, 580819444U, 1864996828U,
+    2526325979U, 1019124258U, 815508628U, 2765933989U, 1277301341U, 3006021786U,
+    855540956U, 288025710U, 1919594237U, 2331223864U, 177452412U, 2475870369U,
+    2689291749U, 865194284U, 253432152U, 2628531804U, 2861208555U, 2361597573U,
+    1653952120U, 1039661024U, 2159959078U, 3709040440U, 3564718533U,
+    2596878672U, 2041442161U, 31164696U, 2662962485U, 3665637339U, 1678115244U,
+    2699839832U, 3651968520U, 3521595541U, 458433303U, 2423096824U, 21831741U,
+    380011703U, 2498168716U, 861806087U, 1673574843U, 4188794405U, 2520563651U,
+    2632279153U, 2170465525U, 4171949898U, 3886039621U, 1661344005U,
+    3424285243U, 992588372U, 2500984144U, 2993248497U, 3590193895U, 1535327365U,
+    515645636U, 131633450U, 3729760261U, 1613045101U, 3254194278U, 15889678U,
+    1493590689U, 244148718U, 2991472662U, 1401629333U, 777349878U, 2501401703U,
+    4285518317U, 3794656178U, 955526526U, 3442142820U, 3970298374U, 736025417U,
+    2737370764U, 1271509744U, 440570731U, 136141826U, 1596189518U, 923399175U,
+    257541519U, 3505774281U, 2194358432U, 2518162991U, 1379893637U, 2667767062U,
+    3748146247U, 1821712620U, 3923161384U, 1947811444U, 2392527197U,
+    4127419685U, 1423694998U, 4156576871U, 1382885582U, 3420127279U,
+    3617499534U, 2994377493U, 4038063986U, 1918458672U, 2983166794U,
+    4200449033U, 353294540U, 1609232588U, 243926648U, 2332803291U, 507996832U,
+    2392838793U, 4075145196U, 2060984340U, 4287475136U, 88232602U, 2491531140U,
+    4159725633U, 2272075455U, 759298618U, 201384554U, 838356250U, 1416268324U,
+    674476934U, 90795364U, 141672229U, 3660399588U, 4196417251U, 3249270244U,
+    3774530247U, 59587265U, 3683164208U, 19392575U, 1463123697U, 1882205379U,
+    293780489U, 2553160622U, 2933904694U, 675638239U, 2851336944U, 1435238743U,
+    2448730183U, 804436302U, 2119845972U, 322560608U, 4097732704U, 2987802540U,
+    641492617U, 2575442710U, 4217822703U, 3271835300U, 2836418300U, 3739921620U,
+    2138378768U, 2879771855U, 4294903423U, 3121097946U, 2603440486U,
+    2560820391U, 1012930944U, 2313499967U, 584489368U, 3431165766U, 897384869U,
+    2062537737U, 2847889234U, 3742362450U, 2951174585U, 4204621084U,
+    1109373893U, 3668075775U, 2750138839U, 3518055702U, 733072558U, 4169325400U,
+    788493625U};
+static const uint64_t init_gen_rand_64_expected[] = {KQU(16924766246869039260),
+    KQU(8201438687333352714), KQU(2265290287015001750),
+    KQU(18397264611805473832), KQU(3375255223302384358),
+    KQU(6345559975416828796), KQU(18229739242790328073),
+    KQU(7596792742098800905), KQU(255338647169685981), KQU(2052747240048610300),
+    KQU(18328151576097299343), KQU(12472905421133796567),
+    KQU(11315245349717600863), KQU(16594110197775871209),
+    KQU(15708751964632456450), KQU(10452031272054632535),
+    KQU(11097646720811454386), KQU(4556090668445745441),
+    KQU(17116187693090663106), KQU(14931526836144510645),
+    KQU(9190752218020552591), KQU(9625800285771901401),
+    KQU(13995141077659972832), KQU(5194209094927829625),
+    KQU(4156788379151063303), KQU(8523452593770139494),
+    KQU(14082382103049296727), KQU(2462601863986088483),
+    KQU(3030583461592840678), KQU(5221622077872827681),
+    KQU(3084210671228981236), KQU(13956758381389953823),
+    KQU(13503889856213423831), KQU(15696904024189836170),
+    KQU(4612584152877036206), KQU(6231135538447867881),
+    KQU(10172457294158869468), KQU(6452258628466708150),
+    KQU(14044432824917330221), KQU(370168364480044279),
+    KQU(10102144686427193359), KQU(667870489994776076),
+    KQU(2732271956925885858), KQU(18027788905977284151),
+    KQU(15009842788582923859), KQU(7136357960180199542),
+    KQU(15901736243475578127), KQU(16951293785352615701),
+    KQU(10551492125243691632), KQU(17668869969146434804),
+    KQU(13646002971174390445), KQU(9804471050759613248),
+    KQU(5511670439655935493), KQU(18103342091070400926),
+    KQU(17224512747665137533), KQU(15534627482992618168),
+    KQU(1423813266186582647), KQU(15821176807932930024), KQU(30323369733607156),
+    KQU(11599382494723479403), KQU(653856076586810062),
+    KQU(3176437395144899659), KQU(14028076268147963917),
+    KQU(16156398271809666195), KQU(3166955484848201676),
+    KQU(5746805620136919390), KQU(17297845208891256593),
+    KQU(11691653183226428483), KQU(17900026146506981577),
+    KQU(15387382115755971042), KQU(16923567681040845943),
+    KQU(8039057517199388606), KQU(11748409241468629263),
+    KQU(794358245539076095), KQU(13438501964693401242),
+    KQU(14036803236515618962), KQU(5252311215205424721),
+    KQU(17806589612915509081), KQU(6802767092397596006),
+    KQU(14212120431184557140), KQU(1072951366761385712),
+    KQU(13098491780722836296), KQU(9466676828710797353),
+    KQU(12673056849042830081), KQU(12763726623645357580),
+    KQU(16468961652999309493), KQU(15305979875636438926),
+    KQU(17444713151223449734), KQU(5692214267627883674),
+    KQU(13049589139196151505), KQU(880115207831670745),
+    KQU(1776529075789695498), KQU(16695225897801466485),
+    KQU(10666901778795346845), KQU(6164389346722833869),
+    KQU(2863817793264300475), KQU(9464049921886304754),
+    KQU(3993566636740015468), KQU(9983749692528514136),
+    KQU(16375286075057755211), KQU(16042643417005440820),
+    KQU(11445419662923489877), KQU(7999038846885158836),
+    KQU(6721913661721511535), KQU(5363052654139357320),
+    KQU(1817788761173584205), KQU(13290974386445856444),
+    KQU(4650350818937984680), KQU(8219183528102484836),
+    KQU(1569862923500819899), KQU(4189359732136641860),
+    KQU(14202822961683148583), KQU(4457498315309429058),
+    KQU(13089067387019074834), KQU(11075517153328927293),
+    KQU(10277016248336668389), KQU(7070509725324401122),
+    KQU(17808892017780289380), KQU(13143367339909287349),
+    KQU(1377743745360085151), KQU(5749341807421286485),
+    KQU(14832814616770931325), KQU(7688820635324359492),
+    KQU(10960474011539770045), KQU(81970066653179790),
+    KQU(12619476072607878022), KQU(4419566616271201744),
+    KQU(15147917311750568503), KQU(5549739182852706345),
+    KQU(7308198397975204770), KQU(13580425496671289278),
+    KQU(17070764785210130301), KQU(8202832846285604405),
+    KQU(6873046287640887249), KQU(6927424434308206114),
+    KQU(6139014645937224874), KQU(10290373645978487639),
+    KQU(15904261291701523804), KQU(9628743442057826883),
+    KQU(18383429096255546714), KQU(4977413265753686967),
+    KQU(7714317492425012869), KQU(9025232586309926193),
+    KQU(14627338359776709107), KQU(14759849896467790763),
+    KQU(10931129435864423252), KQU(4588456988775014359),
+    KQU(10699388531797056724), KQU(468652268869238792),
+    KQU(5755943035328078086), KQU(2102437379988580216),
+    KQU(9986312786506674028), KQU(2654207180040945604),
+    KQU(8726634790559960062), KQU(100497234871808137), KQU(2800137176951425819),
+    KQU(6076627612918553487), KQU(5780186919186152796),
+    KQU(8179183595769929098), KQU(6009426283716221169),
+    KQU(2796662551397449358), KQU(1756961367041986764),
+    KQU(6972897917355606205), KQU(14524774345368968243),
+    KQU(2773529684745706940), KQU(4853632376213075959),
+    KQU(4198177923731358102), KQU(8271224913084139776),
+    KQU(2741753121611092226), KQU(16782366145996731181),
+    KQU(15426125238972640790), KQU(13595497100671260342),
+    KQU(3173531022836259898), KQU(6573264560319511662),
+    KQU(18041111951511157441), KQU(2351433581833135952),
+    KQU(3113255578908173487), KQU(1739371330877858784),
+    KQU(16046126562789165480), KQU(8072101652214192925),
+    KQU(15267091584090664910), KQU(9309579200403648940),
+    KQU(5218892439752408722), KQU(14492477246004337115),
+    KQU(17431037586679770619), KQU(7385248135963250480),
+    KQU(9580144956565560660), KQU(4919546228040008720),
+    KQU(15261542469145035584), KQU(18233297270822253102),
+    KQU(5453248417992302857), KQU(9309519155931460285),
+    KQU(10342813012345291756), KQU(15676085186784762381),
+    KQU(15912092950691300645), KQU(9371053121499003195),
+    KQU(9897186478226866746), KQU(14061858287188196327),
+    KQU(122575971620788119), KQU(12146750969116317754),
+    KQU(4438317272813245201), KQU(8332576791009527119),
+    KQU(13907785691786542057), KQU(10374194887283287467),
+    KQU(2098798755649059566), KQU(3416235197748288894),
+    KQU(8688269957320773484), KQU(7503964602397371571),
+    KQU(16724977015147478236), KQU(9461512855439858184),
+    KQU(13259049744534534727), KQU(3583094952542899294),
+    KQU(8764245731305528292), KQU(13240823595462088985),
+    KQU(13716141617617910448), KQU(18114969519935960955),
+    KQU(2297553615798302206), KQU(4585521442944663362),
+    KQU(17776858680630198686), KQU(4685873229192163363),
+    KQU(152558080671135627), KQU(15424900540842670088),
+    KQU(13229630297130024108), KQU(17530268788245718717),
+    KQU(16675633913065714144), KQU(3158912717897568068),
+    KQU(15399132185380087288), KQU(7401418744515677872),
+    KQU(13135412922344398535), KQU(6385314346100509511),
+    KQU(13962867001134161139), KQU(10272780155442671999),
+    KQU(12894856086597769142), KQU(13340877795287554994),
+    KQU(12913630602094607396), KQU(12543167911119793857),
+    KQU(17343570372251873096), KQU(10959487764494150545),
+    KQU(6966737953093821128), KQU(13780699135496988601),
+    KQU(4405070719380142046), KQU(14923788365607284982),
+    KQU(2869487678905148380), KQU(6416272754197188403),
+    KQU(15017380475943612591), KQU(1995636220918429487),
+    KQU(3402016804620122716), KQU(15800188663407057080),
+    KQU(11362369990390932882), KQU(15262183501637986147),
+    KQU(10239175385387371494), KQU(9352042420365748334),
+    KQU(1682457034285119875), KQU(1724710651376289644),
+    KQU(2038157098893817966), KQU(9897825558324608773),
+    KQU(1477666236519164736), KQU(16835397314511233640),
+    KQU(10370866327005346508), KQU(10157504370660621982),
+    KQU(12113904045335882069), KQU(13326444439742783008),
+    KQU(11302769043000765804), KQU(13594979923955228484),
+    KQU(11779351762613475968), KQU(3786101619539298383),
+    KQU(8021122969180846063), KQU(15745904401162500495),
+    KQU(10762168465993897267), KQU(13552058957896319026),
+    KQU(11200228655252462013), KQU(5035370357337441226),
+    KQU(7593918984545500013), KQU(5418554918361528700),
+    KQU(4858270799405446371), KQU(9974659566876282544),
+    KQU(18227595922273957859), KQU(2772778443635656220),
+    KQU(14285143053182085385), KQU(9939700992429600469),
+    KQU(12756185904545598068), KQU(2020783375367345262), KQU(57026775058331227),
+    KQU(950827867930065454), KQU(6602279670145371217), KQU(2291171535443566929),
+    KQU(5832380724425010313), KQU(1220343904715982285),
+    KQU(17045542598598037633), KQU(15460481779702820971),
+    KQU(13948388779949365130), KQU(13975040175430829518),
+    KQU(17477538238425541763), KQU(11104663041851745725),
+    KQU(15860992957141157587), KQU(14529434633012950138),
+    KQU(2504838019075394203), KQU(7512113882611121886),
+    KQU(4859973559980886617), KQU(1258601555703250219),
+    KQU(15594548157514316394), KQU(4516730171963773048),
+    KQU(11380103193905031983), KQU(6809282239982353344),
+    KQU(18045256930420065002), KQU(2453702683108791859),
+    KQU(977214582986981460), KQU(2006410402232713466), KQU(6192236267216378358),
+    KQU(3429468402195675253), KQU(18146933153017348921),
+    KQU(17369978576367231139), KQU(1246940717230386603),
+    KQU(11335758870083327110), KQU(14166488801730353682),
+    KQU(9008573127269635732), KQU(10776025389820643815),
+    KQU(15087605441903942962), KQU(1359542462712147922),
+    KQU(13898874411226454206), KQU(17911176066536804411),
+    KQU(9435590428600085274), KQU(294488509967864007), KQU(8890111397567922046),
+    KQU(7987823476034328778), KQU(13263827582440967651),
+    KQU(7503774813106751573), KQU(14974747296185646837),
+    KQU(8504765037032103375), KQU(17340303357444536213),
+    KQU(7704610912964485743), KQU(8107533670327205061),
+    KQU(9062969835083315985), KQU(16968963142126734184),
+    KQU(12958041214190810180), KQU(2720170147759570200),
+    KQU(2986358963942189566), KQU(14884226322219356580),
+    KQU(286224325144368520), KQU(11313800433154279797),
+    KQU(18366849528439673248), KQU(17899725929482368789),
+    KQU(3730004284609106799), KQU(1654474302052767205),
+    KQU(5006698007047077032), KQU(8196893913601182838),
+    KQU(15214541774425211640), KQU(17391346045606626073),
+    KQU(8369003584076969089), KQU(3939046733368550293),
+    KQU(10178639720308707785), KQU(2180248669304388697), KQU(62894391300126322),
+    KQU(9205708961736223191), KQU(6837431058165360438),
+    KQU(3150743890848308214), KQU(17849330658111464583),
+    KQU(12214815643135450865), KQU(13410713840519603402),
+    KQU(3200778126692046802), KQU(13354780043041779313),
+    KQU(800850022756886036), KQU(15660052933953067433),
+    KQU(6572823544154375676), KQU(11030281857015819266),
+    KQU(12682241941471433835), KQU(11654136407300274693),
+    KQU(4517795492388641109), KQU(9757017371504524244),
+    KQU(17833043400781889277), KQU(12685085201747792227),
+    KQU(10408057728835019573), KQU(98370418513455221), KQU(6732663555696848598),
+    KQU(13248530959948529780), KQU(3530441401230622826),
+    KQU(18188251992895660615), KQU(1847918354186383756),
+    KQU(1127392190402660921), KQU(11293734643143819463),
+    KQU(3015506344578682982), KQU(13852645444071153329),
+    KQU(2121359659091349142), KQU(1294604376116677694),
+    KQU(5616576231286352318), KQU(7112502442954235625),
+    KQU(11676228199551561689), KQU(12925182803007305359),
+    KQU(7852375518160493082), KQU(1136513130539296154),
+    KQU(5636923900916593195), KQU(3221077517612607747),
+    KQU(17784790465798152513), KQU(3554210049056995938),
+    KQU(17476839685878225874), KQU(3206836372585575732),
+    KQU(2765333945644823430), KQU(10080070903718799528),
+    KQU(5412370818878286353), KQU(9689685887726257728),
+    KQU(8236117509123533998), KQU(1951139137165040214),
+    KQU(4492205209227980349), KQU(16541291230861602967),
+    KQU(1424371548301437940), KQU(9117562079669206794),
+    KQU(14374681563251691625), KQU(13873164030199921303),
+    KQU(6680317946770936731), KQU(15586334026918276214),
+    KQU(10896213950976109802), KQU(9506261949596413689),
+    KQU(9903949574308040616), KQU(6038397344557204470), KQU(174601465422373648),
+    KQU(15946141191338238030), KQU(17142225620992044937),
+    KQU(7552030283784477064), KQU(2947372384532947997), KQU(510797021688197711),
+    KQU(4962499439249363461), KQU(23770320158385357), KQU(959774499105138124),
+    KQU(1468396011518788276), KQU(2015698006852312308),
+    KQU(4149400718489980136), KQU(5992916099522371188),
+    KQU(10819182935265531076), KQU(16189787999192351131),
+    KQU(342833961790261950), KQU(12470830319550495336),
+    KQU(18128495041912812501), KQU(1193600899723524337),
+    KQU(9056793666590079770), KQU(2154021227041669041),
+    KQU(4963570213951235735), KQU(4865075960209211409),
+    KQU(2097724599039942963), KQU(2024080278583179845),
+    KQU(11527054549196576736), KQU(10650256084182390252),
+    KQU(4808408648695766755), KQU(1642839215013788844),
+    KQU(10607187948250398390), KQU(7076868166085913508),
+    KQU(730522571106887032), KQU(12500579240208524895),
+    KQU(4484390097311355324), KQU(15145801330700623870),
+    KQU(8055827661392944028), KQU(5865092976832712268),
+    KQU(15159212508053625143), KQU(3560964582876483341),
+    KQU(4070052741344438280), KQU(6032585709886855634),
+    KQU(15643262320904604873), KQU(2565119772293371111),
+    KQU(318314293065348260), KQU(15047458749141511872),
+    KQU(7772788389811528730), KQU(7081187494343801976),
+    KQU(6465136009467253947), KQU(10425940692543362069),
+    KQU(554608190318339115), KQU(14796699860302125214),
+    KQU(1638153134431111443), KQU(10336967447052276248),
+    KQU(8412308070396592958), KQU(4004557277152051226),
+    KQU(8143598997278774834), KQU(16413323996508783221),
+    KQU(13139418758033994949), KQU(9772709138335006667),
+    KQU(2818167159287157659), KQU(17091740573832523669),
+    KQU(14629199013130751608), KQU(18268322711500338185),
+    KQU(8290963415675493063), KQU(8830864907452542588),
+    KQU(1614839084637494849), KQU(14855358500870422231),
+    KQU(3472996748392519937), KQU(15317151166268877716),
+    KQU(5825895018698400362), KQU(16730208429367544129),
+    KQU(10481156578141202800), KQU(4746166512382823750),
+    KQU(12720876014472464998), KQU(8825177124486735972),
+    KQU(13733447296837467838), KQU(6412293741681359625),
+    KQU(8313213138756135033), KQU(11421481194803712517),
+    KQU(7997007691544174032), KQU(6812963847917605930),
+    KQU(9683091901227558641), KQU(14703594165860324713),
+    KQU(1775476144519618309), KQU(2724283288516469519), KQU(717642555185856868),
+    KQU(8736402192215092346), KQU(11878800336431381021),
+    KQU(4348816066017061293), KQU(6115112756583631307),
+    KQU(9176597239667142976), KQU(12615622714894259204),
+    KQU(10283406711301385987), KQU(5111762509485379420),
+    KQU(3118290051198688449), KQU(7345123071632232145),
+    KQU(9176423451688682359), KQU(4843865456157868971),
+    KQU(12008036363752566088), KQU(12058837181919397720),
+    KQU(2145073958457347366), KQU(1526504881672818067),
+    KQU(3488830105567134848), KQU(13208362960674805143),
+    KQU(4077549672899572192), KQU(7770995684693818365),
+    KQU(1398532341546313593), KQU(12711859908703927840),
+    KQU(1417561172594446813), KQU(17045191024194170604),
+    KQU(4101933177604931713), KQU(14708428834203480320),
+    KQU(17447509264469407724), KQU(14314821973983434255),
+    KQU(17990472271061617265), KQU(5087756685841673942),
+    KQU(12797820586893859939), KQU(1778128952671092879),
+    KQU(3535918530508665898), KQU(9035729701042481301),
+    KQU(14808661568277079962), KQU(14587345077537747914),
+    KQU(11920080002323122708), KQU(6426515805197278753),
+    KQU(3295612216725984831), KQU(11040722532100876120),
+    KQU(12305952936387598754), KQU(16097391899742004253),
+    KQU(4908537335606182208), KQU(12446674552196795504),
+    KQU(16010497855816895177), KQU(9194378874788615551),
+    KQU(3382957529567613384), KQU(5154647600754974077),
+    KQU(9801822865328396141), KQU(9023662173919288143),
+    KQU(17623115353825147868), KQU(8238115767443015816),
+    KQU(15811444159859002560), KQU(9085612528904059661),
+    KQU(6888601089398614254), KQU(258252992894160189), KQU(6704363880792428622),
+    KQU(6114966032147235763), KQU(11075393882690261875),
+    KQU(8797664238933620407), KQU(5901892006476726920),
+    KQU(5309780159285518958), KQU(14940808387240817367),
+    KQU(14642032021449656698), KQU(9808256672068504139),
+    KQU(3670135111380607658), KQU(11211211097845960152),
+    KQU(1474304506716695808), KQU(15843166204506876239),
+    KQU(7661051252471780561), KQU(10170905502249418476),
+    KQU(7801416045582028589), KQU(2763981484737053050),
+    KQU(9491377905499253054), KQU(16201395896336915095),
+    KQU(9256513756442782198), KQU(5411283157972456034),
+    KQU(5059433122288321676), KQU(4327408006721123357),
+    KQU(9278544078834433377), KQU(7601527110882281612),
+    KQU(11848295896975505251), KQU(12096998801094735560),
+    KQU(14773480339823506413), KQU(15586227433895802149),
+    KQU(12786541257830242872), KQU(6904692985140503067),
+    KQU(5309011515263103959), KQU(12105257191179371066),
+    KQU(14654380212442225037), KQU(2556774974190695009),
+    KQU(4461297399927600261), KQU(14888225660915118646),
+    KQU(14915459341148291824), KQU(2738802166252327631),
+    KQU(6047155789239131512), KQU(12920545353217010338),
+    KQU(10697617257007840205), KQU(2751585253158203504),
+    KQU(13252729159780047496), KQU(14700326134672815469),
+    KQU(14082527904374600529), KQU(16852962273496542070),
+    KQU(17446675504235853907), KQU(15019600398527572311),
+    KQU(12312781346344081551), KQU(14524667935039810450),
+    KQU(5634005663377195738), KQU(11375574739525000569),
+    KQU(2423665396433260040), KQU(5222836914796015410),
+    KQU(4397666386492647387), KQU(4619294441691707638), KQU(665088602354770716),
+    KQU(13246495665281593610), KQU(6564144270549729409),
+    KQU(10223216188145661688), KQU(3961556907299230585),
+    KQU(11543262515492439914), KQU(16118031437285993790),
+    KQU(7143417964520166465), KQU(13295053515909486772), KQU(40434666004899675),
+    KQU(17127804194038347164), KQU(8599165966560586269),
+    KQU(8214016749011284903), KQU(13725130352140465239),
+    KQU(5467254474431726291), KQU(7748584297438219877),
+    KQU(16933551114829772472), KQU(2169618439506799400),
+    KQU(2169787627665113463), KQU(17314493571267943764),
+    KQU(18053575102911354912), KQU(11928303275378476973),
+    KQU(11593850925061715550), KQU(17782269923473589362),
+    KQU(3280235307704747039), KQU(6145343578598685149),
+    KQU(17080117031114086090), KQU(18066839902983594755),
+    KQU(6517508430331020706), KQU(8092908893950411541),
+    KQU(12558378233386153732), KQU(4476532167973132976),
+    KQU(16081642430367025016), KQU(4233154094369139361),
+    KQU(8693630486693161027), KQU(11244959343027742285),
+    KQU(12273503967768513508), KQU(14108978636385284876),
+    KQU(7242414665378826984), KQU(6561316938846562432),
+    KQU(8601038474994665795), KQU(17532942353612365904),
+    KQU(17940076637020912186), KQU(7340260368823171304),
+    KQU(7061807613916067905), KQU(10561734935039519326),
+    KQU(17990796503724650862), KQU(6208732943911827159),
+    KQU(359077562804090617), KQU(14177751537784403113),
+    KQU(10659599444915362902), KQU(15081727220615085833),
+    KQU(13417573895659757486), KQU(15513842342017811524),
+    KQU(11814141516204288231), KQU(1827312513875101814),
+    KQU(2804611699894603103), KQU(17116500469975602763),
+    KQU(12270191815211952087), KQU(12256358467786024988),
+    KQU(18435021722453971267), KQU(671330264390865618), KQU(476504300460286050),
+    KQU(16465470901027093441), KQU(4047724406247136402),
+    KQU(1322305451411883346), KQU(1388308688834322280),
+    KQU(7303989085269758176), KQU(9323792664765233642),
+    KQU(4542762575316368936), KQU(17342696132794337618),
+    KQU(4588025054768498379), KQU(13415475057390330804),
+    KQU(17880279491733405570), KQU(10610553400618620353),
+    KQU(3180842072658960139), KQU(13002966655454270120),
+    KQU(1665301181064982826), KQU(7083673946791258979), KQU(190522247122496820),
+    KQU(17388280237250677740), KQU(8430770379923642945),
+    KQU(12987180971921668584), KQU(2311086108365390642),
+    KQU(2870984383579822345), KQU(14014682609164653318),
+    KQU(14467187293062251484), KQU(192186361147413298),
+    KQU(15171951713531796524), KQU(9900305495015948728),
+    KQU(17958004775615466344), KQU(14346380954498606514),
+    KQU(18040047357617407096), KQU(5035237584833424532),
+    KQU(15089555460613972287), KQU(4131411873749729831),
+    KQU(1329013581168250330), KQU(10095353333051193949),
+    KQU(10749518561022462716), KQU(9050611429810755847),
+    KQU(15022028840236655649), KQU(8775554279239748298),
+    KQU(13105754025489230502), KQU(15471300118574167585),
+    KQU(89864764002355628), KQU(8776416323420466637), KQU(5280258630612040891),
+    KQU(2719174488591862912), KQU(7599309137399661994),
+    KQU(15012887256778039979), KQU(14062981725630928925),
+    KQU(12038536286991689603), KQU(7089756544681775245),
+    KQU(10376661532744718039), KQU(1265198725901533130),
+    KQU(13807996727081142408), KQU(2935019626765036403),
+    KQU(7651672460680700141), KQU(3644093016200370795),
+    KQU(2840982578090080674), KQU(17956262740157449201),
+    KQU(18267979450492880548), KQU(11799503659796848070),
+    KQU(9942537025669672388), KQU(11886606816406990297),
+    KQU(5488594946437447576), KQU(7226714353282744302),
+    KQU(3784851653123877043), KQU(878018453244803041),
+    KQU(12110022586268616085), KQU(734072179404675123),
+    KQU(11869573627998248542), KQU(469150421297783998), KQU(260151124912803804),
+    KQU(11639179410120968649), KQU(9318165193840846253),
+    KQU(12795671722734758075), KQU(15318410297267253933),
+    KQU(691524703570062620), KQU(5837129010576994601),
+    KQU(15045963859726941052), KQU(5850056944932238169),
+    KQU(12017434144750943807), KQU(7447139064928956574),
+    KQU(3101711812658245019), KQU(16052940704474982954),
+    KQU(18195745945986994042), KQU(8932252132785575659),
+    KQU(13390817488106794834), KQU(11582771836502517453),
+    KQU(4964411326683611686), KQU(2195093981702694011),
+    KQU(14145229538389675669), KQU(16459605532062271798),
+    KQU(866316924816482864), KQU(4593041209937286377), KQU(8415491391910972138),
+    KQU(4171236715600528969), KQU(16637569303336782889),
+    KQU(2002011073439212680), KQU(17695124661097601411),
+    KQU(4627687053598611702), KQU(7895831936020190403),
+    KQU(8455951300917267802), KQU(2923861649108534854),
+    KQU(8344557563927786255), KQU(6408671940373352556),
+    KQU(12210227354536675772), KQU(14294804157294222295),
+    KQU(10103022425071085127), KQU(10092959489504123771),
+    KQU(6554774405376736268), KQU(12629917718410641774),
+    KQU(6260933257596067126), KQU(2460827021439369673),
+    KQU(2541962996717103668), KQU(597377203127351475), KQU(5316984203117315309),
+    KQU(4811211393563241961), KQU(13119698597255811641),
+    KQU(8048691512862388981), KQU(10216818971194073842),
+    KQU(4612229970165291764), KQU(10000980798419974770),
+    KQU(6877640812402540687), KQU(1488727563290436992),
+    KQU(2227774069895697318), KQU(11237754507523316593),
+    KQU(13478948605382290972), KQU(1963583846976858124),
+    KQU(5512309205269276457), KQU(3972770164717652347),
+    KQU(3841751276198975037), KQU(10283343042181903117),
+    KQU(8564001259792872199), KQU(16472187244722489221),
+    KQU(8953493499268945921), KQU(3518747340357279580),
+    KQU(4003157546223963073), KQU(3270305958289814590),
+    KQU(3966704458129482496), KQU(8122141865926661939),
+    KQU(14627734748099506653), KQU(13064426990862560568),
+    KQU(2414079187889870829), KQU(5378461209354225306),
+    KQU(10841985740128255566), KQU(538582442885401738),
+    KQU(7535089183482905946), KQU(16117559957598879095),
+    KQU(8477890721414539741), KQU(1459127491209533386),
+    KQU(17035126360733620462), KQU(8517668552872379126),
+    KQU(10292151468337355014), KQU(17081267732745344157),
+    KQU(13751455337946087178), KQU(14026945459523832966),
+    KQU(6653278775061723516), KQU(10619085543856390441),
+    KQU(2196343631481122885), KQU(10045966074702826136),
+    KQU(10082317330452718282), KQU(5920859259504831242),
+    KQU(9951879073426540617), KQU(7074696649151414158),
+    KQU(15808193543879464318), KQU(7385247772746953374),
+    KQU(3192003544283864292), KQU(18153684490917593847),
+    KQU(12423498260668568905), KQU(10957758099756378169),
+    KQU(11488762179911016040), KQU(2099931186465333782),
+    KQU(11180979581250294432), KQU(8098916250668367933),
+    KQU(3529200436790763465), KQU(12988418908674681745),
+    KQU(6147567275954808580), KQU(3207503344604030989),
+    KQU(10761592604898615360), KQU(229854861031893504),
+    KQU(8809853962667144291), KQU(13957364469005693860),
+    KQU(7634287665224495886), KQU(12353487366976556874),
+    KQU(1134423796317152034), KQU(2088992471334107068),
+    KQU(7393372127190799698), KQU(1845367839871058391), KQU(207922563987322884),
+    KQU(11960870813159944976), KQU(12182120053317317363),
+    KQU(17307358132571709283), KQU(13871081155552824936),
+    KQU(18304446751741566262), KQU(7178705220184302849),
+    KQU(10929605677758824425), KQU(16446976977835806844),
+    KQU(13723874412159769044), KQU(6942854352100915216),
+    KQU(1726308474365729390), KQU(2150078766445323155),
+    KQU(15345558947919656626), KQU(12145453828874527201),
+    KQU(2054448620739726849), KQU(2740102003352628137),
+    KQU(11294462163577610655), KQU(756164283387413743),
+    KQU(17841144758438810880), KQU(10802406021185415861),
+    KQU(8716455530476737846), KQU(6321788834517649606),
+    KQU(14681322910577468426), KQU(17330043563884336387),
+    KQU(12701802180050071614), KQU(14695105111079727151),
+    KQU(5112098511654172830), KQU(4957505496794139973),
+    KQU(8270979451952045982), KQU(12307685939199120969),
+    KQU(12425799408953443032), KQU(8376410143634796588),
+    KQU(16621778679680060464), KQU(3580497854566660073),
+    KQU(1122515747803382416), KQU(857664980960597599), KQU(6343640119895925918),
+    KQU(12878473260854462891), KQU(10036813920765722626),
+    KQU(14451335468363173812), KQU(5476809692401102807),
+    KQU(16442255173514366342), KQU(13060203194757167104),
+    KQU(14354124071243177715), KQU(15961249405696125227),
+    KQU(13703893649690872584), KQU(363907326340340064),
+    KQU(6247455540491754842), KQU(12242249332757832361),
+    KQU(156065475679796717), KQU(9351116235749732355), KQU(4590350628677701405),
+    KQU(1671195940982350389), KQU(13501398458898451905),
+    KQU(6526341991225002255), KQU(1689782913778157592),
+    KQU(7439222350869010334), KQU(13975150263226478308),
+    KQU(11411961169932682710), KQU(17204271834833847277),
+    KQU(541534742544435367), KQU(6591191931218949684), KQU(2645454775478232486),
+    KQU(4322857481256485321), KQU(8477416487553065110),
+    KQU(12902505428548435048), KQU(971445777981341415),
+    KQU(14995104682744976712), KQU(4243341648807158063),
+    KQU(8695061252721927661), KQU(5028202003270177222),
+    KQU(2289257340915567840), KQU(13870416345121866007),
+    KQU(13994481698072092233), KQU(6912785400753196481),
+    KQU(2278309315841980139), KQU(4329765449648304839),
+    KQU(5963108095785485298), KQU(4880024847478722478),
+    KQU(16015608779890240947), KQU(1866679034261393544),
+    KQU(914821179919731519), KQU(9643404035648760131), KQU(2418114953615593915),
+    KQU(944756836073702374), KQU(15186388048737296834),
+    KQU(7723355336128442206), KQU(7500747479679599691),
+    KQU(18013961306453293634), KQU(2315274808095756456),
+    KQU(13655308255424029566), KQU(17203800273561677098),
+    KQU(1382158694422087756), KQU(5090390250309588976), KQU(517170818384213989),
+    KQU(1612709252627729621), KQU(1330118955572449606), KQU(300922478056709885),
+    KQU(18115693291289091987), KQU(13491407109725238321),
+    KQU(15293714633593827320), KQU(5151539373053314504),
+    KQU(5951523243743139207), KQU(14459112015249527975),
+    KQU(5456113959000700739), KQU(3877918438464873016),
+    KQU(12534071654260163555), KQU(15871678376893555041),
+    KQU(11005484805712025549), KQU(16353066973143374252),
+    KQU(4358331472063256685), KQU(8268349332210859288),
+    KQU(12485161590939658075), KQU(13955993592854471343),
+    KQU(5911446886848367039), KQU(14925834086813706974),
+    KQU(6590362597857994805), KQU(1280544923533661875),
+    KQU(1637756018947988164), KQU(4734090064512686329),
+    KQU(16693705263131485912), KQU(6834882340494360958),
+    KQU(8120732176159658505), KQU(2244371958905329346),
+    KQU(10447499707729734021), KQU(7318742361446942194),
+    KQU(8032857516355555296), KQU(14023605983059313116),
+    KQU(1032336061815461376), KQU(9840995337876562612),
+    KQU(9869256223029203587), KQU(12227975697177267636),
+    KQU(12728115115844186033), KQU(7752058479783205470),
+    KQU(729733219713393087), KQU(12954017801239007622)};
+static const uint64_t init_by_array_64_expected[] = {KQU(2100341266307895239),
+    KQU(8344256300489757943), KQU(15687933285484243894),
+    KQU(8268620370277076319), KQU(12371852309826545459),
+    KQU(8800491541730110238), KQU(18113268950100835773),
+    KQU(2886823658884438119), KQU(3293667307248180724),
+    KQU(9307928143300172731), KQU(7688082017574293629), KQU(900986224735166665),
+    KQU(9977972710722265039), KQU(6008205004994830552), KQU(546909104521689292),
+    KQU(7428471521869107594), KQU(14777563419314721179),
+    KQU(16116143076567350053), KQU(5322685342003142329),
+    KQU(4200427048445863473), KQU(4693092150132559146),
+    KQU(13671425863759338582), KQU(6747117460737639916),
+    KQU(4732666080236551150), KQU(5912839950611941263),
+    KQU(3903717554504704909), KQU(2615667650256786818),
+    KQU(10844129913887006352), KQU(13786467861810997820),
+    KQU(14267853002994021570), KQU(13767807302847237439),
+    KQU(16407963253707224617), KQU(4802498363698583497),
+    KQU(2523802839317209764), KQU(3822579397797475589),
+    KQU(8950320572212130610), KQU(3745623504978342534),
+    KQU(16092609066068482806), KQU(9817016950274642398),
+    KQU(10591660660323829098), KQU(11751606650792815920),
+    KQU(5122873818577122211), KQU(17209553764913936624),
+    KQU(6249057709284380343), KQU(15088791264695071830),
+    KQU(15344673071709851930), KQU(4345751415293646084),
+    KQU(2542865750703067928), KQU(13520525127852368784),
+    KQU(18294188662880997241), KQU(3871781938044881523),
+    KQU(2873487268122812184), KQU(15099676759482679005),
+    KQU(15442599127239350490), KQU(6311893274367710888),
+    KQU(3286118760484672933), KQU(4146067961333542189),
+    KQU(13303942567897208770), KQU(8196013722255630418),
+    KQU(4437815439340979989), KQU(15433791533450605135),
+    KQU(4254828956815687049), KQU(1310903207708286015),
+    KQU(10529182764462398549), KQU(14900231311660638810),
+    KQU(9727017277104609793), KQU(1821308310948199033),
+    KQU(11628861435066772084), KQU(9469019138491546924),
+    KQU(3145812670532604988), KQU(9938468915045491919),
+    KQU(1562447430672662142), KQU(13963995266697989134),
+    KQU(3356884357625028695), KQU(4499850304584309747),
+    KQU(8456825817023658122), KQU(10859039922814285279),
+    KQU(8099512337972526555), KQU(348006375109672149),
+    KQU(11919893998241688603), KQU(1104199577402948826),
+    KQU(16689191854356060289), KQU(10992552041730168078),
+    KQU(7243733172705465836), KQU(5668075606180319560),
+    KQU(18182847037333286970), KQU(4290215357664631322),
+    KQU(4061414220791828613), KQU(13006291061652989604),
+    KQU(7140491178917128798), KQU(12703446217663283481),
+    KQU(5500220597564558267), KQU(10330551509971296358),
+    KQU(15958554768648714492), KQU(5174555954515360045),
+    KQU(1731318837687577735), KQU(3557700801048354857),
+    KQU(13764012341928616198), KQU(13115166194379119043),
+    KQU(7989321021560255519), KQU(2103584280905877040),
+    KQU(9230788662155228488), KQU(16396629323325547654),
+    KQU(657926409811318051), KQU(15046700264391400727),
+    KQU(5120132858771880830), KQU(7934160097989028561),
+    KQU(6963121488531976245), KQU(17412329602621742089),
+    KQU(15144843053931774092), KQU(17204176651763054532),
+    KQU(13166595387554065870), KQU(8590377810513960213),
+    KQU(5834365135373991938), KQU(7640913007182226243),
+    KQU(3479394703859418425), KQU(16402784452644521040),
+    KQU(4993979809687083980), KQU(13254522168097688865),
+    KQU(15643659095244365219), KQU(5881437660538424982),
+    KQU(11174892200618987379), KQU(254409966159711077),
+    KQU(17158413043140549909), KQU(3638048789290376272),
+    KQU(1376816930299489190), KQU(4622462095217761923),
+    KQU(15086407973010263515), KQU(13253971772784692238),
+    KQU(5270549043541649236), KQU(11182714186805411604),
+    KQU(12283846437495577140), KQU(5297647149908953219),
+    KQU(10047451738316836654), KQU(4938228100367874746),
+    KQU(12328523025304077923), KQU(3601049438595312361),
+    KQU(9313624118352733770), KQU(13322966086117661798),
+    KQU(16660005705644029394), KQU(11337677526988872373),
+    KQU(13869299102574417795), KQU(15642043183045645437),
+    KQU(3021755569085880019), KQU(4979741767761188161),
+    KQU(13679979092079279587), KQU(3344685842861071743),
+    KQU(13947960059899588104), KQU(305806934293368007),
+    KQU(5749173929201650029), KQU(11123724852118844098),
+    KQU(15128987688788879802), KQU(15251651211024665009),
+    KQU(7689925933816577776), KQU(16732804392695859449),
+    KQU(17087345401014078468), KQU(14315108589159048871),
+    KQU(4820700266619778917), KQU(16709637539357958441),
+    KQU(4936227875177351374), KQU(2137907697912987247),
+    KQU(11628565601408395420), KQU(2333250549241556786),
+    KQU(5711200379577778637), KQU(5170680131529031729),
+    KQU(12620392043061335164), KQU(95363390101096078), KQU(5487981914081709462),
+    KQU(1763109823981838620), KQU(3395861271473224396),
+    KQU(1300496844282213595), KQU(6894316212820232902),
+    KQU(10673859651135576674), KQU(5911839658857903252),
+    KQU(17407110743387299102), KQU(8257427154623140385),
+    KQU(11389003026741800267), KQU(4070043211095013717),
+    KQU(11663806997145259025), KQU(15265598950648798210),
+    KQU(630585789434030934), KQU(3524446529213587334), KQU(7186424168495184211),
+    KQU(10806585451386379021), KQU(11120017753500499273),
+    KQU(1586837651387701301), KQU(17530454400954415544),
+    KQU(9991670045077880430), KQU(7550997268990730180),
+    KQU(8640249196597379304), KQU(3522203892786893823),
+    KQU(10401116549878854788), KQU(13690285544733124852),
+    KQU(8295785675455774586), KQU(15535716172155117603),
+    KQU(3112108583723722511), KQU(17633179955339271113),
+    KQU(18154208056063759375), KQU(1866409236285815666),
+    KQU(13326075895396412882), KQU(8756261842948020025),
+    KQU(6281852999868439131), KQU(15087653361275292858),
+    KQU(10333923911152949397), KQU(5265567645757408500),
+    KQU(12728041843210352184), KQU(6347959327507828759),
+    KQU(154112802625564758), KQU(18235228308679780218),
+    KQU(3253805274673352418), KQU(4849171610689031197),
+    KQU(17948529398340432518), KQU(13803510475637409167),
+    KQU(13506570190409883095), KQU(15870801273282960805),
+    KQU(8451286481299170773), KQU(9562190620034457541),
+    KQU(8518905387449138364), KQU(12681306401363385655),
+    KQU(3788073690559762558), KQU(5256820289573487769),
+    KQU(2752021372314875467), KQU(6354035166862520716),
+    KQU(4328956378309739069), KQU(449087441228269600), KQU(5533508742653090868),
+    KQU(1260389420404746988), KQU(18175394473289055097),
+    KQU(1535467109660399420), KQU(8818894282874061442),
+    KQU(12140873243824811213), KQU(15031386653823014946),
+    KQU(1286028221456149232), KQU(6329608889367858784),
+    KQU(9419654354945132725), KQU(6094576547061672379),
+    KQU(17706217251847450255), KQU(1733495073065878126),
+    KQU(16918923754607552663), KQU(8881949849954945044),
+    KQU(12938977706896313891), KQU(14043628638299793407),
+    KQU(18393874581723718233), KQU(6886318534846892044),
+    KQU(14577870878038334081), KQU(13541558383439414119),
+    KQU(13570472158807588273), KQU(18300760537910283361),
+    KQU(818368572800609205), KQU(1417000585112573219),
+    KQU(12337533143867683655), KQU(12433180994702314480),
+    KQU(778190005829189083), KQU(13667356216206524711),
+    KQU(9866149895295225230), KQU(11043240490417111999),
+    KQU(1123933826541378598), KQU(6469631933605123610),
+    KQU(14508554074431980040), KQU(13918931242962026714),
+    KQU(2870785929342348285), KQU(14786362626740736974),
+    KQU(13176680060902695786), KQU(9591778613541679456),
+    KQU(9097662885117436706), KQU(749262234240924947), KQU(1944844067793307093),
+    KQU(4339214904577487742), KQU(8009584152961946551),
+    KQU(16073159501225501777), KQU(3335870590499306217),
+    KQU(17088312653151202847), KQU(3108893142681931848),
+    KQU(16636841767202792021), KQU(10423316431118400637),
+    KQU(8008357368674443506), KQU(11340015231914677875),
+    KQU(17687896501594936090), KQU(15173627921763199958),
+    KQU(542569482243721959), KQU(15071714982769812975),
+    KQU(4466624872151386956), KQU(1901780715602332461),
+    KQU(9822227742154351098), KQU(1479332892928648780),
+    KQU(6981611948382474400), KQU(7620824924456077376),
+    KQU(14095973329429406782), KQU(7902744005696185404),
+    KQU(15830577219375036920), KQU(10287076667317764416),
+    KQU(12334872764071724025), KQU(4419302088133544331),
+    KQU(14455842851266090520), KQU(12488077416504654222),
+    KQU(7953892017701886766), KQU(6331484925529519007),
+    KQU(4902145853785030022), KQU(17010159216096443073),
+    KQU(11945354668653886087), KQU(15112022728645230829),
+    KQU(17363484484522986742), KQU(4423497825896692887),
+    KQU(8155489510809067471), KQU(258966605622576285), KQU(5462958075742020534),
+    KQU(6763710214913276228), KQU(2368935183451109054),
+    KQU(14209506165246453811), KQU(2646257040978514881),
+    KQU(3776001911922207672), KQU(1419304601390147631),
+    KQU(14987366598022458284), KQU(3977770701065815721),
+    KQU(730820417451838898), KQU(3982991703612885327), KQU(2803544519671388477),
+    KQU(17067667221114424649), KQU(2922555119737867166),
+    KQU(1989477584121460932), KQU(15020387605892337354),
+    KQU(9293277796427533547), KQU(10722181424063557247),
+    KQU(16704542332047511651), KQU(5008286236142089514),
+    KQU(16174732308747382540), KQU(17597019485798338402),
+    KQU(13081745199110622093), KQU(8850305883842258115),
+    KQU(12723629125624589005), KQU(8140566453402805978),
+    KQU(15356684607680935061), KQU(14222190387342648650),
+    KQU(11134610460665975178), KQU(1259799058620984266),
+    KQU(13281656268025610041), KQU(298262561068153992),
+    KQU(12277871700239212922), KQU(13911297774719779438),
+    KQU(16556727962761474934), KQU(17903010316654728010),
+    KQU(9682617699648434744), KQU(14757681836838592850),
+    KQU(1327242446558524473), KQU(11126645098780572792),
+    KQU(1883602329313221774), KQU(2543897783922776873),
+    KQU(15029168513767772842), KQU(12710270651039129878),
+    KQU(16118202956069604504), KQU(15010759372168680524),
+    KQU(2296827082251923948), KQU(10793729742623518101),
+    KQU(13829764151845413046), KQU(17769301223184451213),
+    KQU(3118268169210783372), KQU(17626204544105123127),
+    KQU(7416718488974352644), KQU(10450751996212925994),
+    KQU(9352529519128770586), KQU(259347569641110140), KQU(8048588892269692697),
+    KQU(1774414152306494058), KQU(10669548347214355622),
+    KQU(13061992253816795081), KQU(18432677803063861659),
+    KQU(8879191055593984333), KQU(12433753195199268041),
+    KQU(14919392415439730602), KQU(6612848378595332963),
+    KQU(6320986812036143628), KQU(10465592420226092859),
+    KQU(4196009278962570808), KQU(3747816564473572224),
+    KQU(17941203486133732898), KQU(2350310037040505198),
+    KQU(5811779859134370113), KQU(10492109599506195126),
+    KQU(7699650690179541274), KQU(1954338494306022961),
+    KQU(14095816969027231152), KQU(5841346919964852061),
+    KQU(14945969510148214735), KQU(3680200305887550992),
+    KQU(6218047466131695792), KQU(8242165745175775096),
+    KQU(11021371934053307357), KQU(1265099502753169797),
+    KQU(4644347436111321718), KQU(3609296916782832859),
+    KQU(8109807992218521571), KQU(18387884215648662020),
+    KQU(14656324896296392902), KQU(17386819091238216751),
+    KQU(17788300878582317152), KQU(7919446259742399591),
+    KQU(4466613134576358004), KQU(12928181023667938509),
+    KQU(13147446154454932030), KQU(16552129038252734620),
+    KQU(8395299403738822450), KQU(11313817655275361164),
+    KQU(434258809499511718), KQU(2074882104954788676), KQU(7929892178759395518),
+    KQU(9006461629105745388), KQU(5176475650000323086),
+    KQU(11128357033468341069), KQU(12026158851559118955),
+    KQU(14699716249471156500), KQU(448982497120206757),
+    KQU(4156475356685519900), KQU(6063816103417215727),
+    KQU(10073289387954971479), KQU(8174466846138590962),
+    KQU(2675777452363449006), KQU(9090685420572474281),
+    KQU(6659652652765562060), KQU(12923120304018106621),
+    KQU(11117480560334526775), KQU(937910473424587511),
+    KQU(1838692113502346645), KQU(11133914074648726180),
+    KQU(7922600945143884053), KQU(13435287702700959550),
+    KQU(5287964921251123332), KQU(11354875374575318947),
+    KQU(17955724760748238133), KQU(13728617396297106512),
+    KQU(4107449660118101255), KQU(1210269794886589623),
+    KQU(11408687205733456282), KQU(4538354710392677887),
+    KQU(13566803319341319267), KQU(17870798107734050771),
+    KQU(3354318982568089135), KQU(9034450839405133651),
+    KQU(13087431795753424314), KQU(950333102820688239),
+    KQU(1968360654535604116), KQU(16840551645563314995),
+    KQU(8867501803892924995), KQU(11395388644490626845),
+    KQU(1529815836300732204), KQU(13330848522996608842),
+    KQU(1813432878817504265), KQU(2336867432693429560),
+    KQU(15192805445973385902), KQU(2528593071076407877),
+    KQU(128459777936689248), KQU(9976345382867214866), KQU(6208885766767996043),
+    KQU(14982349522273141706), KQU(3099654362410737822),
+    KQU(13776700761947297661), KQU(8806185470684925550),
+    KQU(8151717890410585321), KQU(640860591588072925),
+    KQU(14592096303937307465), KQU(9056472419613564846),
+    KQU(14861544647742266352), KQU(12703771500398470216),
+    KQU(3142372800384138465), KQU(6201105606917248196),
+    KQU(18337516409359270184), KQU(15042268695665115339),
+    KQU(15188246541383283846), KQU(12800028693090114519),
+    KQU(5992859621101493472), KQU(18278043971816803521),
+    KQU(9002773075219424560), KQU(7325707116943598353),
+    KQU(7930571931248040822), KQU(5645275869617023448),
+    KQU(7266107455295958487), KQU(4363664528273524411),
+    KQU(14313875763787479809), KQU(17059695613553486802),
+    KQU(9247761425889940932), KQU(13704726459237593128),
+    KQU(2701312427328909832), KQU(17235532008287243115),
+    KQU(14093147761491729538), KQU(6247352273768386516),
+    KQU(8268710048153268415), KQU(7985295214477182083),
+    KQU(15624495190888896807), KQU(3772753430045262788),
+    KQU(9133991620474991698), KQU(5665791943316256028),
+    KQU(7551996832462193473), KQU(13163729206798953877),
+    KQU(9263532074153846374), KQU(1015460703698618353),
+    KQU(17929874696989519390), KQU(18257884721466153847),
+    KQU(16271867543011222991), KQU(3905971519021791941),
+    KQU(16814488397137052085), KQU(1321197685504621613),
+    KQU(2870359191894002181), KQU(14317282970323395450),
+    KQU(13663920845511074366), KQU(2052463995796539594),
+    KQU(14126345686431444337), KQU(1727572121947022534),
+    KQU(17793552254485594241), KQU(6738857418849205750),
+    KQU(1282987123157442952), KQU(16655480021581159251),
+    KQU(6784587032080183866), KQU(14726758805359965162),
+    KQU(7577995933961987349), KQU(12539609320311114036),
+    KQU(10789773033385439494), KQU(8517001497411158227),
+    KQU(10075543932136339710), KQU(14838152340938811081),
+    KQU(9560840631794044194), KQU(17445736541454117475),
+    KQU(10633026464336393186), KQU(15705729708242246293),
+    KQU(1117517596891411098), KQU(4305657943415886942),
+    KQU(4948856840533979263), KQU(16071681989041789593),
+    KQU(13723031429272486527), KQU(7639567622306509462),
+    KQU(12670424537483090390), KQU(9715223453097197134),
+    KQU(5457173389992686394), KQU(289857129276135145),
+    KQU(17048610270521972512), KQU(692768013309835485),
+    KQU(14823232360546632057), KQU(18218002361317895936),
+    KQU(3281724260212650204), KQU(16453957266549513795),
+    KQU(8592711109774511881), KQU(929825123473369579),
+    KQU(15966784769764367791), KQU(9627344291450607588),
+    KQU(10849555504977813287), KQU(9234566913936339275),
+    KQU(6413807690366911210), KQU(10862389016184219267),
+    KQU(13842504799335374048), KQU(1531994113376881174),
+    KQU(2081314867544364459), KQU(16430628791616959932),
+    KQU(8314714038654394368), KQU(9155473892098431813),
+    KQU(12577843786670475704), KQU(4399161106452401017),
+    KQU(1668083091682623186), KQU(1741383777203714216),
+    KQU(2162597285417794374), KQU(15841980159165218736),
+    KQU(1971354603551467079), KQU(1206714764913205968),
+    KQU(4790860439591272330), KQU(14699375615594055799),
+    KQU(8374423871657449988), KQU(10950685736472937738),
+    KQU(697344331343267176), KQU(10084998763118059810),
+    KQU(12897369539795983124), KQU(12351260292144383605),
+    KQU(1268810970176811234), KQU(7406287800414582768), KQU(516169557043807831),
+    KQU(5077568278710520380), KQU(3828791738309039304),
+    KQU(7721974069946943610), KQU(3534670260981096460),
+    KQU(4865792189600584891), KQU(16892578493734337298),
+    KQU(9161499464278042590), KQU(11976149624067055931),
+    KQU(13219479887277343990), KQU(14161556738111500680),
+    KQU(14670715255011223056), KQU(4671205678403576558),
+    KQU(12633022931454259781), KQU(14821376219869187646),
+    KQU(751181776484317028), KQU(2192211308839047070),
+    KQU(11787306362361245189), KQU(10672375120744095707),
+    KQU(4601972328345244467), KQU(15457217788831125879),
+    KQU(8464345256775460809), KQU(10191938789487159478),
+    KQU(6184348739615197613), KQU(11425436778806882100),
+    KQU(2739227089124319793), KQU(461464518456000551), KQU(4689850170029177442),
+    KQU(6120307814374078625), KQU(11153579230681708671),
+    KQU(7891721473905347926), KQU(10281646937824872400),
+    KQU(3026099648191332248), KQU(8666750296953273818),
+    KQU(14978499698844363232), KQU(13303395102890132065),
+    KQU(8182358205292864080), KQU(10560547713972971291),
+    KQU(11981635489418959093), KQU(3134621354935288409),
+    KQU(11580681977404383968), KQU(14205530317404088650),
+    KQU(5997789011854923157), KQU(13659151593432238041),
+    KQU(11664332114338865086), KQU(7490351383220929386),
+    KQU(7189290499881530378), KQU(15039262734271020220),
+    KQU(2057217285976980055), KQU(555570804905355739),
+    KQU(11235311968348555110), KQU(13824557146269603217),
+    KQU(16906788840653099693), KQU(7222878245455661677),
+    KQU(5245139444332423756), KQU(4723748462805674292),
+    KQU(12216509815698568612), KQU(17402362976648951187),
+    KQU(17389614836810366768), KQU(4880936484146667711),
+    KQU(9085007839292639880), KQU(13837353458498535449),
+    KQU(11914419854360366677), KQU(16595890135313864103),
+    KQU(6313969847197627222), KQU(18296909792163910431),
+    KQU(10041780113382084042), KQU(2499478551172884794),
+    KQU(11057894246241189489), KQU(9742243032389068555),
+    KQU(12838934582673196228), KQU(13437023235248490367),
+    KQU(13372420669446163240), KQU(6752564244716909224),
+    KQU(7157333073400313737), KQU(12230281516370654308),
+    KQU(1182884552219419117), KQU(2955125381312499218),
+    KQU(10308827097079443249), KQU(1337648572986534958),
+    KQU(16378788590020343939), KQU(108619126514420935),
+    KQU(3990981009621629188), KQU(5460953070230946410),
+    KQU(9703328329366531883), KQU(13166631489188077236),
+    KQU(1104768831213675170), KQU(3447930458553877908),
+    KQU(8067172487769945676), KQU(5445802098190775347),
+    KQU(3244840981648973873), KQU(17314668322981950060),
+    KQU(5006812527827763807), KQU(18158695070225526260),
+    KQU(2824536478852417853), KQU(13974775809127519886),
+    KQU(9814362769074067392), KQU(17276205156374862128),
+    KQU(11361680725379306967), KQU(3422581970382012542),
+    KQU(11003189603753241266), KQU(11194292945277862261),
+    KQU(6839623313908521348), KQU(11935326462707324634),
+    KQU(1611456788685878444), KQU(13112620989475558907),
+    KQU(517659108904450427), KQU(13558114318574407624),
+    KQU(15699089742731633077), KQU(4988979278862685458),
+    KQU(8111373583056521297), KQU(3891258746615399627),
+    KQU(8137298251469718086), KQU(12748663295624701649),
+    KQU(4389835683495292062), KQU(5775217872128831729),
+    KQU(9462091896405534927), KQU(8498124108820263989),
+    KQU(8059131278842839525), KQU(10503167994254090892),
+    KQU(11613153541070396656), KQU(18069248738504647790),
+    KQU(570657419109768508), KQU(3950574167771159665), KQU(5514655599604313077),
+    KQU(2908460854428484165), KQU(10777722615935663114),
+    KQU(12007363304839279486), KQU(9800646187569484767),
+    KQU(8795423564889864287), KQU(14257396680131028419),
+    KQU(6405465117315096498), KQU(7939411072208774878),
+    KQU(17577572378528990006), KQU(14785873806715994850),
+    KQU(16770572680854747390), KQU(18127549474419396481),
+    KQU(11637013449455757750), KQU(14371851933996761086),
+    KQU(3601181063650110280), KQU(4126442845019316144),
+    KQU(10198287239244320669), KQU(18000169628555379659),
+    KQU(18392482400739978269), KQU(6219919037686919957),
+    KQU(3610085377719446052), KQU(2513925039981776336),
+    KQU(16679413537926716955), KQU(12903302131714909434),
+    KQU(5581145789762985009), KQU(12325955044293303233),
+    KQU(17216111180742141204), KQU(6321919595276545740),
+    KQU(3507521147216174501), KQU(9659194593319481840),
+    KQU(11473976005975358326), KQU(14742730101435987026),
+    KQU(492845897709954780), KQU(16976371186162599676),
+    KQU(17712703422837648655), KQU(9881254778587061697),
+    KQU(8413223156302299551), KQU(1563841828254089168),
+    KQU(9996032758786671975), KQU(138877700583772667),
+    KQU(13003043368574995989), KQU(4390573668650456587),
+    KQU(8610287390568126755), KQU(15126904974266642199),
+    KQU(6703637238986057662), KQU(2873075592956810157),
+    KQU(6035080933946049418), KQU(13382846581202353014),
+    KQU(7303971031814642463), KQU(18418024405307444267),
+    KQU(5847096731675404647), KQU(4035880699639842500),
+    KQU(11525348625112218478), KQU(3041162365459574102),
+    KQU(2604734487727986558), KQU(15526341771636983145),
+    KQU(14556052310697370254), KQU(12997787077930808155),
+    KQU(9601806501755554499), KQU(11349677952521423389),
+    KQU(14956777807644899350), KQU(16559736957742852721),
+    KQU(12360828274778140726), KQU(6685373272009662513),
+    KQU(16932258748055324130), KQU(15918051131954158508),
+    KQU(1692312913140790144), KQU(546653826801637367), KQU(5341587076045986652),
+    KQU(14975057236342585662), KQU(12374976357340622412),
+    KQU(10328833995181940552), KQU(12831807101710443149),
+    KQU(10548514914382545716), KQU(2217806727199715993),
+    KQU(12627067369242845138), KQU(4598965364035438158),
+    KQU(150923352751318171), KQU(14274109544442257283),
+    KQU(4696661475093863031), KQU(1505764114384654516),
+    KQU(10699185831891495147), KQU(2392353847713620519),
+    KQU(3652870166711788383), KQU(8640653276221911108),
+    KQU(3894077592275889704), KQU(4918592872135964845),
+    KQU(16379121273281400789), KQU(12058465483591683656),
+    KQU(11250106829302924945), KQU(1147537556296983005),
+    KQU(6376342756004613268), KQU(14967128191709280506),
+    KQU(18007449949790627628), KQU(9497178279316537841),
+    KQU(7920174844809394893), KQU(10037752595255719907),
+    KQU(15875342784985217697), KQU(15311615921712850696),
+    KQU(9552902652110992950), KQU(14054979450099721140),
+    KQU(5998709773566417349), KQU(18027910339276320187),
+    KQU(8223099053868585554), KQU(7842270354824999767),
+    KQU(4896315688770080292), KQU(12969320296569787895),
+    KQU(2674321489185759961), KQU(4053615936864718439),
+    KQU(11349775270588617578), KQU(4743019256284553975),
+    KQU(5602100217469723769), KQU(14398995691411527813),
+    KQU(7412170493796825470), KQU(836262406131744846), KQU(8231086633845153022),
+    KQU(5161377920438552287), KQU(8828731196169924949),
+    KQU(16211142246465502680), KQU(3307990879253687818),
+    KQU(5193405406899782022), KQU(8510842117467566693),
+    KQU(6070955181022405365), KQU(14482950231361409799),
+    KQU(12585159371331138077), KQU(3511537678933588148),
+    KQU(2041849474531116417), KQU(10944936685095345792),
+    KQU(18303116923079107729), KQU(2720566371239725320),
+    KQU(4958672473562397622), KQU(3032326668253243412),
+    KQU(13689418691726908338), KQU(1895205511728843996),
+    KQU(8146303515271990527), KQU(16507343500056113480),
+    KQU(473996939105902919), KQU(9897686885246881481),
+    KQU(14606433762712790575), KQU(6732796251605566368),
+    KQU(1399778120855368916), KQU(935023885182833777),
+    KQU(16066282816186753477), KQU(7291270991820612055),
+    KQU(17530230393129853844), KQU(10223493623477451366),
+    KQU(15841725630495676683), KQU(17379567246435515824),
+    KQU(8588251429375561971), KQU(18339511210887206423),
+    KQU(17349587430725976100), KQU(12244876521394838088),
+    KQU(6382187714147161259), KQU(12335807181848950831),
+    KQU(16948885622305460665), KQU(13755097796371520506),
+    KQU(14806740373324947801), KQU(4828699633859287703),
+    KQU(8209879281452301604), KQU(12435716669553736437),
+    KQU(13970976859588452131), KQU(6233960842566773148),
+    KQU(12507096267900505759), KQU(1198713114381279421),
+    KQU(14989862731124149015), KQU(15932189508707978949),
+    KQU(2526406641432708722), KQU(29187427817271982), KQU(1499802773054556353),
+    KQU(10816638187021897173), KQU(5436139270839738132),
+    KQU(6659882287036010082), KQU(2154048955317173697),
+    KQU(10887317019333757642), KQU(16281091802634424955),
+    KQU(10754549879915384901), KQU(10760611745769249815),
+    KQU(2161505946972504002), KQU(5243132808986265107),
+    KQU(10129852179873415416), KQU(710339480008649081),
+    KQU(7802129453068808528), KQU(17967213567178907213),
+    KQU(15730859124668605599), KQU(13058356168962376502),
+    KQU(3701224985413645909), KQU(14464065869149109264),
+    KQU(9959272418844311646), KQU(10157426099515958752),
+    KQU(14013736814538268528), KQU(17797456992065653951),
+    KQU(17418878140257344806), KQU(15457429073540561521),
+    KQU(2184426881360949378), KQU(2062193041154712416),
+    KQU(8553463347406931661), KQU(4913057625202871854),
+    KQU(2668943682126618425), KQU(17064444737891172288),
+    KQU(4997115903913298637), KQU(12019402608892327416),
+    KQU(17603584559765897352), KQU(11367529582073647975),
+    KQU(8211476043518436050), KQU(8676849804070323674),
+    KQU(18431829230394475730), KQU(10490177861361247904),
+    KQU(9508720602025651349), KQU(7409627448555722700),
+    KQU(5804047018862729008), KQU(11943858176893142594),
+    KQU(11908095418933847092), KQU(5415449345715887652),
+    KQU(1554022699166156407), KQU(9073322106406017161),
+    KQU(7080630967969047082), KQU(18049736940860732943),
+    KQU(12748714242594196794), KQU(1226992415735156741),
+    KQU(17900981019609531193), KQU(11720739744008710999),
+    KQU(3006400683394775434), KQU(11347974011751996028),
+    KQU(3316999628257954608), KQU(8384484563557639101),
+    KQU(18117794685961729767), KQU(1900145025596618194),
+    KQU(17459527840632892676), KQU(5634784101865710994),
+    KQU(7918619300292897158), KQU(3146577625026301350),
+    KQU(9955212856499068767), KQU(1873995843681746975),
+    KQU(1561487759967972194), KQU(8322718804375878474),
+    KQU(11300284215327028366), KQU(4667391032508998982),
+    KQU(9820104494306625580), KQU(17922397968599970610),
+    KQU(1784690461886786712), KQU(14940365084341346821),
+    KQU(5348719575594186181), KQU(10720419084507855261),
+    KQU(14210394354145143274), KQU(2426468692164000131),
+    KQU(16271062114607059202), KQU(14851904092357070247),
+    KQU(6524493015693121897), KQU(9825473835127138531),
+    KQU(14222500616268569578), KQU(15521484052007487468),
+    KQU(14462579404124614699), KQU(11012375590820665520),
+    KQU(11625327350536084927), KQU(14452017765243785417),
+    KQU(9989342263518766305), KQU(3640105471101803790),
+    KQU(4749866455897513242), KQU(13963064946736312044),
+    KQU(10007416591973223791), KQU(18314132234717431115),
+    KQU(3286596588617483450), KQU(7726163455370818765),
+    KQU(7575454721115379328), KQU(5308331576437663422),
+    KQU(18288821894903530934), KQU(8028405805410554106),
+    KQU(15744019832103296628), KQU(149765559630932100),
+    KQU(6137705557200071977), KQU(14513416315434803615),
+    KQU(11665702820128984473), KQU(218926670505601386),
+    KQU(6868675028717769519), KQU(15282016569441512302),
+    KQU(5707000497782960236), KQU(6671120586555079567),
+    KQU(2194098052618985448), KQU(16849577895477330978),
+    KQU(12957148471017466283), KQU(1997805535404859393),
+    KQU(1180721060263860490), KQU(13206391310193756958),
+    KQU(12980208674461861797), KQU(3825967775058875366),
+    KQU(17543433670782042631), KQU(1518339070120322730),
+    KQU(16344584340890991669), KQU(2611327165318529819),
+    KQU(11265022723283422529), KQU(4001552800373196817),
+    KQU(14509595890079346161), KQU(3528717165416234562),
+    KQU(18153222571501914072), KQU(9387182977209744425),
+    KQU(10064342315985580021), KQU(11373678413215253977),
+    KQU(2308457853228798099), KQU(9729042942839545302),
+    KQU(7833785471140127746), KQU(6351049900319844436),
+    KQU(14454610627133496067), KQU(12533175683634819111),
+    KQU(15570163926716513029), KQU(13356980519185762498)};
 
 TEST_BEGIN(test_gen_rand_32) {
 	uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint32_t r32;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
-	    "Array size too small");
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE, "Array size too small");
 	ctx = init_gen_rand(1234);
 	fill_array32(ctx, array32, BLOCK_SIZE);
 	fill_array32(ctx, array32_2, BLOCK_SIZE);
@@ -1486,13 +1405,12 @@ TEST_END
 TEST_BEGIN(test_by_array_32) {
 	uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint32_t ini[4] = {0x1234, 0x5678, 0x9abc, 0xdef0};
 	uint32_t r32;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
-	    "Array size too small");
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE, "Array size too small");
 	ctx = init_by_array(ini, 4);
 	fill_array32(ctx, array32, BLOCK_SIZE);
 	fill_array32(ctx, array32_2, BLOCK_SIZE);
@@ -1521,12 +1439,12 @@ TEST_END
 TEST_BEGIN(test_gen_rand_64) {
 	uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint64_t r;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
-	    "Array size too small");
+	expect_d_le(
+	    get_min_array_size64(), BLOCK_SIZE64, "Array size too small");
 	ctx = init_gen_rand(4321);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
 	fill_array64(ctx, array64_2, BLOCK_SIZE64);
@@ -1540,13 +1458,13 @@ TEST_BEGIN(test_gen_rand_64) {
 		}
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64[i],
-		    "Mismatch at array64[%d]=%"FMTx64", gen=%"FMTx64, i,
+		    "Mismatch at array64[%d]=%" FMTx64 ", gen=%" FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64_2[i],
-		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64"", i,
+		    "Mismatch at array64_2[%d]=%" FMTx64 " gen=%" FMTx64 "", i,
 		    array64_2[i], r);
 	}
 	fini_gen_rand(ctx);
@@ -1556,13 +1474,13 @@ TEST_END
 TEST_BEGIN(test_by_array_64) {
 	uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint64_t r;
 	uint32_t ini[] = {5, 4, 3, 2, 1};
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
-	    "Array size too small");
+	expect_d_le(
+	    get_min_array_size64(), BLOCK_SIZE64, "Array size too small");
 	ctx = init_by_array(ini, 5);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
 	fill_array64(ctx, array64_2, BLOCK_SIZE64);
@@ -1576,13 +1494,13 @@ TEST_BEGIN(test_by_array_64) {
 		}
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64[i],
-		    "Mismatch at array64[%d]=%"FMTx64" gen=%"FMTx64, i,
+		    "Mismatch at array64[%d]=%" FMTx64 " gen=%" FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64_2[i],
-		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64, i,
+		    "Mismatch at array64_2[%d]=%" FMTx64 " gen=%" FMTx64, i,
 		    array64_2[i], r);
 	}
 	fini_gen_rand(ctx);
@@ -1591,9 +1509,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_gen_rand_32,
-	    test_by_array_32,
-	    test_gen_rand_64,
+	return test(test_gen_rand_32, test_by_array_32, test_gen_rand_64,
 	    test_by_array_64);
 }
diff --git a/test/unit/a0.c b/test/unit/a0.c
index c1be79a6..63d792d2 100644
--- a/test/unit/a0.c
+++ b/test/unit/a0.c
@@ -11,6 +11,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_malloc_init(
-	    test_a0);
+	return test_no_malloc_init(test_a0);
 }
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index e991f4dd..99c08ab9 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -4,11 +4,11 @@
 #include "jemalloc/internal/ticker.h"
 
 static nstime_monotonic_t *nstime_monotonic_orig;
-static nstime_update_t *nstime_update_orig;
+static nstime_update_t    *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
-static bool monotonic_mock;
+static bool     monotonic_mock;
 
 static bool
 nstime_monotonic_mock(void) {
@@ -18,7 +18,7 @@ nstime_monotonic_mock(void) {
 static void
 nstime_update_mock(nstime_t *time) {
 	nupdates_mock++;
-	if (monotonic_mock) {
+	if (monotonic_mock && nstime_compare(&time_mock, time) > 0) {
 		nstime_copy(time, &time_mock);
 	}
 }
@@ -28,26 +28,27 @@ TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(opt_hpa);
 
 	ticker_geom_t *decay_ticker;
-	unsigned tick0, tick1, arena_ind;
-	size_t sz, large0;
-	void *p;
+	unsigned       tick0, tick1, arena_ind;
+	size_t         sz, large0;
+	void          *p;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	/* Set up a manually managed arena for test. */
 	arena_ind = do_arena_create(0, 0);
 
 	/* Migrate to the new arena, and get the ticker. */
 	unsigned old_arena_ind;
-	size_t sz_arena_ind = sizeof(old_arena_ind);
+	size_t   sz_arena_ind = sizeof(old_arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
-	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 	decay_ticker = tsd_arena_decay_tickerp_get(tsd_fetch());
-	expect_ptr_not_null(decay_ticker,
-	    "Unexpected failure getting decay ticker");
+	expect_ptr_not_null(
+	    decay_ticker, "Unexpected failure getting decay ticker");
 
 	/*
 	 * Test the standard APIs using a large size class, since we can't
@@ -80,8 +81,8 @@ TEST_BEGIN(test_decay_ticks) {
 	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
 	    "Unexpected posix_memalign() failure");
 	tick1 = ticker_geom_read(decay_ticker);
-	expect_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during posix_memalign()");
+	expect_u32_ne(
+	    tick1, tick0, "Expected ticker to tick during posix_memalign()");
 	free(p);
 
 	/* aligned_alloc(). */
@@ -89,8 +90,8 @@ TEST_BEGIN(test_decay_ticks) {
 	p = aligned_alloc(sizeof(size_t), large0);
 	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
 	tick1 = ticker_geom_read(decay_ticker);
-	expect_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during aligned_alloc()");
+	expect_u32_ne(
+	    tick1, tick0, "Expected ticker to tick during aligned_alloc()");
 	free(p);
 
 	/* realloc(). */
@@ -118,7 +119,7 @@ TEST_BEGIN(test_decay_ticks) {
 	 */
 	{
 		unsigned i;
-		size_t allocx_sizes[2];
+		size_t   allocx_sizes[2];
 		allocx_sizes[0] = large0;
 		allocx_sizes[1] = 1;
 
@@ -163,7 +164,8 @@ TEST_BEGIN(test_decay_ticks) {
 			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during sdallocx() "
-			    "(sz=%zu)", sz);
+			    "(sz=%zu)",
+			    sz);
 		}
 	}
 
@@ -172,18 +174,19 @@ TEST_BEGIN(test_decay_ticks) {
 	 * using an explicit tcache.
 	 */
 	unsigned tcache_ind, i;
-	size_t tcache_sizes[2];
+	size_t   tcache_sizes[2];
 	tcache_sizes[0] = large0;
 	tcache_sizes[1] = 1;
 
 	size_t tcache_max, sz_tcache_max;
 	sz_tcache_max = sizeof(tcache_max);
 	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-	    &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
+	                &sz_tcache_max, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
 		sz = tcache_sizes[i];
@@ -195,13 +198,14 @@ TEST_BEGIN(test_decay_ticks) {
 		tick1 = ticker_geom_read(decay_ticker);
 		expect_u32_ne(tick1, tick0,
 		    "Expected ticker to tick during tcache fill "
-		    "(sz=%zu)", sz);
+		    "(sz=%zu)",
+		    sz);
 		/* tcache flush. */
 		dallocx(p, MALLOCX_TCACHE(tcache_ind));
 		tick0 = ticker_geom_read(decay_ticker);
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
-		    (void *)&tcache_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl failure");
+		                (void *)&tcache_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl failure");
 		tick1 = ticker_geom_read(decay_ticker);
 
 		/* Will only tick if it's in tcache. */
@@ -231,11 +235,11 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 	 * cached slab were to repeatedly come and go during looping, it could
 	 * prevent the decay backlog ever becoming empty.
 	 */
-	void *p = do_mallocx(1, flags);
+	void    *p = do_mallocx(1, flags);
 	uint64_t dirty_npurge1, muzzy_npurge1;
 	do {
 		for (unsigned i = 0; i < ARENA_DECAY_NTICKS_PER_UPDATE / 2;
-		    i++) {
+		     i++) {
 			void *q = do_mallocx(1, flags);
 			dallocx(q, flags);
 		}
@@ -244,14 +248,15 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 
 		nstime_add(&time_mock, &update_interval);
 		nstime_update(&time);
-	} while (nstime_compare(&time, &deadline) <= 0 && ((dirty_npurge1 ==
-	    dirty_npurge0 && muzzy_npurge1 == muzzy_npurge0) ||
-	    !terminate_asap));
+	} while (nstime_compare(&time, &deadline) <= 0
+	    && ((dirty_npurge1 == dirty_npurge0
+	            && muzzy_npurge1 == muzzy_npurge0)
+	        || !terminate_asap));
 	dallocx(p, flags);
 
 	if (config_stats) {
-		expect_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
-		    muzzy_npurge0, "Expected purging to occur");
+		expect_u64_gt(dirty_npurge1 + muzzy_npurge1,
+		    dirty_npurge0 + muzzy_npurge0, "Expected purging to occur");
 	}
 #undef NINTERVALS
 }
@@ -260,11 +265,11 @@ TEST_BEGIN(test_decay_ticker) {
 	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS 2048
-	ssize_t ddt = opt_dirty_decay_ms;
-	ssize_t mdt = opt_muzzy_decay_ms;
+	ssize_t  ddt = opt_dirty_decay_ms;
+	ssize_t  mdt = opt_muzzy_decay_ms;
 	unsigned arena_ind = do_arena_create(ddt, mdt);
-	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
-	void *ps[NPS];
+	int      flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	void    *ps[NPS];
 
 	/*
 	 * Allocate a bunch of large objects, pause the clock, deallocate every
@@ -274,8 +279,9 @@ TEST_BEGIN(test_decay_ticker) {
 	 */
 	size_t large;
 	size_t sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	do_purge(arena_ind);
 	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
@@ -302,9 +308,9 @@ TEST_BEGIN(test_decay_ticker) {
 		    "Expected nstime_update() to be called");
 	}
 
-	decay_ticker_helper(arena_ind, flags, true, ddt, dirty_npurge0,
-	    muzzy_npurge0, true);
-	decay_ticker_helper(arena_ind, flags, false, ddt+mdt, dirty_npurge0,
+	decay_ticker_helper(
+	    arena_ind, flags, true, ddt, dirty_npurge0, muzzy_npurge0, true);
+	decay_ticker_helper(arena_ind, flags, false, ddt + mdt, dirty_npurge0,
 	    muzzy_npurge0, false);
 
 	do_arena_destroy(arena_ind);
@@ -319,16 +325,17 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
-	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
-	void *ps[NPS];
+	int      flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
+	void    *ps[NPS];
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
-	size_t sz, large0;
+	size_t   sz, large0;
 	unsigned i, nupdates0;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
@@ -380,15 +387,15 @@ TEST_BEGIN(test_decay_now) {
 	unsigned arena_ind = do_arena_create(0, 0);
 	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
 	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
-	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+	size_t sizes[] = {16, PAGE << 2, HUGEPAGE << 2};
 	/* Verify that dirty/muzzy pages never linger after deallocation. */
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		size_t size = sizes[i];
 		generate_dirty(arena_ind, size);
-		expect_zu_eq(get_arena_pdirty(arena_ind), 0,
-		    "Unexpected dirty pages");
-		expect_zu_eq(get_arena_pmuzzy(arena_ind), 0,
-		    "Unexpected muzzy pages");
+		expect_zu_eq(
+		    get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+		expect_zu_eq(
+		    get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	}
 	do_arena_destroy(arena_ind);
 }
@@ -399,20 +406,27 @@ TEST_BEGIN(test_decay_never) {
 	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int      flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
 	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
-	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
-	void *ptrs[sizeof(sizes)/sizeof(size_t)];
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	size_t sizes[] = {16, PAGE << 2, HUGEPAGE << 2};
+	void  *ptrs[sizeof(sizes) / sizeof(size_t)];
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		ptrs[i] = do_mallocx(sizes[i], flags);
 	}
 	/* Verify that each deallocation generates additional dirty pages. */
 	size_t pdirty_prev = get_arena_pdirty(arena_ind);
 	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
-	expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	/*
+	 * With sz_large_size_classes_disabled() = true, some more extents
+	 * are cached in the dirty pool, making the assumption below
+	 * not true.
+	 */
+	if (!sz_large_size_classes_disabled()) {
+		expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	}
 	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		dallocx(ptrs[i], flags);
 		size_t pdirty = get_arena_pdirty(arena_ind);
 		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
@@ -427,10 +441,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_decay_ticks,
-	    test_decay_ticker,
-	    test_decay_nonmonotonic,
-	    test_decay_now,
-	    test_decay_never);
+	return test(test_decay_ticks, test_decay_ticker,
+	    test_decay_nonmonotonic, test_decay_now, test_decay_never);
 }
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 8ef0786c..3e0f3d75 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -1,5 +1,5 @@
 #ifndef ARENA_RESET_PROF_C_
-#include "test/jemalloc_test.h"
+#	include "test/jemalloc_test.h"
 #endif
 
 #include "jemalloc/internal/extent_mmap.h"
@@ -10,7 +10,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -37,12 +37,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -61,8 +61,8 @@ get_large_size(size_t ind) {
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
-	    ptr, &full_alloc_ctx);
+	bool                  missing = emap_full_alloc_ctx_try_lookup(
+            tsdn, &arena_emap_global, ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
@@ -78,26 +78,27 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 		return 0;
 	}
 
-	return sz_index2size(full_alloc_ctx.szind);
+	return edata_usize_get(full_alloc_ctx.edata);
 }
 
 static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
-	    "Unexpected mallctl() failure");
+	size_t   sz = sizeof(unsigned);
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&arena_ind, &sz,
+	        (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)),
+	    0, "Unexpected mallctl() failure");
 	return arena_ind;
 }
 
 static void
 do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
-#define NLARGE	32
+#define NLARGE 32
 	unsigned nsmall, nlarge, i;
-	size_t sz;
-	int flags;
-	tsdn_t *tsdn;
+	size_t   sz;
+	int      flags;
+	tsdn_t  *tsdn;
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
@@ -132,14 +133,14 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 
 static void
 do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
-	tsdn_t *tsdn;
+	tsdn_t  *tsdn;
 	unsigned i;
 
 	tsdn = tsdn_fetch();
 
 	if (have_background_thread) {
-		malloc_mutex_lock(tsdn,
-		    &background_thread_info_get(arena_ind)->mtx);
+		malloc_mutex_lock(
+		    tsdn, &background_thread_info_get(arena_ind)->mtx);
 	}
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
@@ -147,8 +148,8 @@ do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
 		    "Allocation should no longer exist");
 	}
 	if (have_background_thread) {
-		malloc_mutex_unlock(tsdn,
-		    &background_thread_info_get(arena_ind)->mtx);
+		malloc_mutex_unlock(
+		    tsdn, &background_thread_info_get(arena_ind)->mtx);
 	}
 
 	free(ptrs);
@@ -159,7 +160,7 @@ do_arena_reset_destroy(const char *name, unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib(name, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -179,7 +180,7 @@ do_arena_destroy(unsigned arena_ind) {
 
 TEST_BEGIN(test_arena_reset) {
 	unsigned arena_ind;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	arena_ind = do_arena_create(NULL);
@@ -191,23 +192,25 @@ TEST_END
 
 static bool
 arena_i_initialized(unsigned arena_ind, bool refresh) {
-	bool initialized;
+	bool   initialized;
 	size_t mib[3];
 	size_t miblen, sz;
 
 	if (refresh) {
 		uint64_t epoch = 1;
-		expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-		    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+		expect_d_eq(
+		    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+		    0, "Unexpected mallctl() failure");
 	}
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	sz = sizeof(initialized);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
-	    0), 0, "Unexpected mallctlbymib() failure");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
 
 	return initialized;
 }
@@ -220,7 +223,7 @@ TEST_END
 
 TEST_BEGIN(test_arena_destroy_hooks_default) {
 	unsigned arena_ind, arena_ind_another, arena_ind_prev;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	arena_ind = do_arena_create(NULL);
@@ -249,26 +252,27 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	arena_ind_prev = arena_ind;
 	arena_ind = do_arena_create(NULL);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
-	expect_u_eq(arena_ind, arena_ind_prev,
-	    "Arena index should have been recycled");
+	expect_u_eq(
+	    arena_ind, arena_ind_prev, "Arena index should have been recycled");
 	do_arena_destroy(arena_ind);
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
 
 	do_arena_destroy(arena_ind_another);
 
 	/* Try arena.create with custom hooks. */
-	size_t sz = sizeof(extent_hooks_t *);
+	size_t          sz = sizeof(extent_hooks_t *);
 	extent_hooks_t *a0_default_hooks;
 	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&a0_default_hooks,
-	    &sz, NULL, 0), 0, "Unexpected mallctlnametomib() failure");
+	                &sz, NULL, 0),
+	    0, "Unexpected mallctlnametomib() failure");
 
 	/* Default impl; but wrapped as "customized". */
-	extent_hooks_t new_hooks = *a0_default_hooks;
+	extent_hooks_t  new_hooks = *a0_default_hooks;
 	extent_hooks_t *hook = &new_hooks;
 	sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)&hook, sizeof(void *)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&hook, sizeof(void *)),
+	    0, "Unexpected mallctl() failure");
 	do_arena_destroy(arena_ind);
 }
 TEST_END
@@ -280,13 +284,15 @@ TEST_END
 static bool
 extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->dalloc, extent_dalloc_unmap, "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
 		return true;
@@ -301,21 +307,15 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static extent_hooks_t hooks_orig;
 
-static extent_hooks_t hooks_unmap = {
-	extent_alloc_hook,
-	extent_dalloc_unmap, /* dalloc */
-	extent_destroy_hook,
-	extent_commit_hook,
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	extent_split_hook,
-	extent_merge_hook
-};
+static extent_hooks_t hooks_unmap = {extent_alloc_hook,
+    extent_dalloc_unmap, /* dalloc */
+    extent_destroy_hook, extent_commit_hook, extent_decommit_hook,
+    extent_purge_lazy_hook, extent_purge_forced_hook, extent_split_hook,
+    extent_merge_hook};
 
 TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	unsigned arena_ind;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	extent_hooks_prep();
@@ -353,9 +353,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_arena_reset,
-	    test_arena_destroy_initial,
-	    test_arena_destroy_hooks_default,
-	    test_arena_destroy_hooks_unmap);
+	return test(test_arena_reset, test_arena_destroy_initial,
+	    test_arena_destroy_hooks_default, test_arena_destroy_hooks_unmap);
 }
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index c2ec8c7e..b4f59431 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -15,6 +15,7 @@
  * and val3 for desired.
  */
 
+/* clang-format off */
 #define DO_TESTS(t, ta, val1, val2, val3) do {				\
 	t val;								\
 	t expected;							\
@@ -174,6 +175,7 @@ typedef struct {							\
 		DO_INTEGER_TESTS(t, ta, test.val1, test.val2);		\
 	}								\
 } while (0)
+/* clang-format on */
 
 TEST_STRUCT(uint64_t, u64);
 TEST_BEGIN(test_atomic_u64) {
@@ -185,7 +187,6 @@ TEST_BEGIN(test_atomic_u64) {
 }
 TEST_END
 
-
 TEST_STRUCT(uint32_t, u32);
 TEST_BEGIN(test_atomic_u32) {
 	INTEGER_TEST_BODY(uint32_t, u32);
@@ -210,7 +211,6 @@ TEST_BEGIN(test_atomic_zd) {
 }
 TEST_END
 
-
 TEST_STRUCT(unsigned, u);
 TEST_BEGIN(test_atomic_u) {
 	INTEGER_TEST_BODY(unsigned, u);
@@ -219,11 +219,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_atomic_u64,
-	    test_atomic_u32,
-	    test_atomic_p,
-	    test_atomic_zu,
-	    test_atomic_zd,
-	    test_atomic_u);
+	return test(test_atomic_u64, test_atomic_u32, test_atomic_p,
+	    test_atomic_zu, test_atomic_zd, test_atomic_u);
 }
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index c60010a8..819a81a6 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -4,14 +4,13 @@
 
 static void
 test_switch_background_thread_ctl(bool new_val) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
 	e1 = new_val;
-	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
-	    &e1, sz), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, !e1,
-	    "background_thread should be %d before.\n", !e1);
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz, &e1, sz), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(e0, !e1, "background_thread should be %d before.\n", !e1);
 	if (e1) {
 		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
@@ -23,14 +22,13 @@ test_switch_background_thread_ctl(bool new_val) {
 
 static void
 test_repeat_background_thread_ctl(bool before) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
 	e1 = before;
-	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
-	    &e1, sz), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, before,
-	    "background_thread should be %d.\n", before);
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz, &e1, sz), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(e0, before, "background_thread should be %d.\n", before);
 	if (e1) {
 		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
@@ -43,15 +41,15 @@ test_repeat_background_thread_ctl(bool before) {
 TEST_BEGIN(test_background_thread_ctl) {
 	test_skip_if(!have_background_thread);
 
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
-	expect_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_d_eq(mallctl("background_thread", (void *)&e1, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, e1,
-	    "Default and opt.background_thread does not match.\n");
+	expect_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("background_thread", (void *)&e1, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(
+	    e0, e1, "Default and opt.background_thread does not match.\n");
 	if (e0) {
 		test_switch_background_thread_ctl(false);
 	}
@@ -75,7 +73,7 @@ TEST_BEGIN(test_background_thread_running) {
 	test_skip_if(!config_stats);
 
 #if defined(JEMALLOC_BACKGROUND_THREAD)
-	tsd_t *tsd = tsd_fetch();
+	tsd_t                    *tsd = tsd_fetch();
 	background_thread_info_t *info = &background_thread_info[0];
 
 	test_repeat_background_thread_ctl(false);
@@ -113,6 +111,5 @@ int
 main(void) {
 	/* Background_thread creation tests reentrancy naturally. */
 	return test_no_reentrancy(
-	    test_background_thread_ctl,
-	    test_background_thread_running);
+	    test_background_thread_ctl, test_background_thread_running);
 }
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 44034ac6..57f26c4b 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:20";
+const char *malloc_conf =
+    "background_thread:false,narenas:1,max_background_threads:8";
 
 static unsigned
 max_test_narenas(void) {
@@ -12,26 +13,23 @@ max_test_narenas(void) {
 	 * approximation.
 	 */
 	unsigned ret = 10 * ncpus;
-	/* Limit the max to avoid VM exhaustion on 32-bit . */
-	if (ret > 512) {
-		ret = 512;
-	}
 
-	return ret;
+	/* Limit the max to avoid VM exhaustion on 32-bit . */
+	return ret > 256 ? 256 : ret;
 }
 
 TEST_BEGIN(test_deferred) {
 	test_skip_if(!have_background_thread);
 
 	unsigned id;
-	size_t sz_u = sizeof(unsigned);
+	size_t   sz_u = sizeof(unsigned);
 
 	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
-	bool enable = true;
+	bool   enable = true;
 	size_t sz_b = sizeof(bool);
 	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
@@ -47,26 +45,32 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t max_n_thds;
 	size_t opt_max_n_thds;
 	size_t sz_m = sizeof(max_n_thds);
-	expect_d_eq(mallctl("opt.max_background_threads",
-	    &opt_max_n_thds, &sz_m, NULL, 0), 0,
-	    "Failed to get opt.max_background_threads");
-	expect_d_eq(mallctl("max_background_threads", &max_n_thds, &sz_m, NULL,
-	    0), 0, "Failed to get max background threads");
+	expect_d_eq(mallctl("opt.max_background_threads", &opt_max_n_thds,
+	                &sz_m, NULL, 0),
+	    0, "Failed to get opt.max_background_threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", &max_n_thds, &sz_m, NULL, 0), 0,
+	    "Failed to get max background threads");
 	expect_zu_eq(opt_max_n_thds, max_n_thds,
 	    "max_background_threads and "
 	    "opt.max_background_threads should match");
-	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
-	    sz_m), 0, "Failed to set max background threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", NULL, NULL, &max_n_thds, sz_m), 0,
+	    "Failed to set max background threads");
+	size_t size_zero = 0;
+	expect_d_ne(
+	    mallctl("max_background_threads", NULL, NULL, &size_zero, sz_m), 0,
+	    "Should not allow zero background threads");
 
 	unsigned id;
-	size_t sz_u = sizeof(unsigned);
+	size_t   sz_u = sizeof(unsigned);
 
 	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
-	bool enable = true;
+	bool   enable = true;
 	size_t sz_b = sizeof(bool);
 	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
@@ -75,14 +79,18 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t new_max_thds = max_n_thds - 1;
 	if (new_max_thds > 0) {
 		expect_d_eq(mallctl("max_background_threads", NULL, NULL,
-		    &new_max_thds, sz_m), 0,
-		    "Failed to set max background threads");
+		                &new_max_thds, sz_m),
+		    0, "Failed to set max background threads");
 		expect_zu_eq(n_background_threads, new_max_thds,
 		    "Number of background threads should decrease by 1.\n");
 	}
 	new_max_thds = 1;
-	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
-	    sz_m), 0, "Failed to set max background threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", NULL, NULL, &new_max_thds, sz_m),
+	    0, "Failed to set max background threads");
+	expect_d_ne(
+	    mallctl("max_background_threads", NULL, NULL, &size_zero, sz_m), 0,
+	    "Should not allow zero background threads");
 	expect_zu_eq(n_background_threads, new_max_thds,
 	    "Number of background threads should be 1.\n");
 }
@@ -90,7 +98,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-		test_deferred,
-		test_max_background_threads);
+	return test_no_reentrancy(test_deferred, test_max_background_threads);
 }
diff --git a/test/unit/background_thread_init.c b/test/unit/background_thread_init.c
new file mode 100644
index 00000000..169b96c7
--- /dev/null
+++ b/test/unit/background_thread_init.c
@@ -0,0 +1,183 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Test to verify that background thread initialization has no race conditions.
+ *
+ * See https://github.com/facebook/jemalloc/pull/68
+ */
+
+#ifdef JEMALLOC_BACKGROUND_THREAD
+const char *malloc_conf = "background_thread:true,percpu_arena:percpu";
+#else
+const char *malloc_conf = "";
+#endif
+
+#define N_INIT_THREADS 32
+#define N_ITERATIONS 10
+
+static mtx_t barrier_mtx;
+static atomic_u32_t n_waiting;
+static unsigned n_threads;
+static atomic_b_t release;
+
+/*
+ * Simple spin barrier - all threads wait until everyone arrives,
+ * then they all proceed to call malloc() simultaneously.
+ */
+static void
+barrier_wait(void) {
+	mtx_lock(&barrier_mtx);
+	uint32_t waiting = atomic_load_u32(&n_waiting, ATOMIC_RELAXED) + 1;
+	atomic_store_u32(&n_waiting, waiting, ATOMIC_RELAXED);
+	bool should_release = (waiting == n_threads);
+	mtx_unlock(&barrier_mtx);
+
+	if (should_release) {
+		atomic_store_b(&release, true, ATOMIC_RELEASE);
+	}
+
+	while (!atomic_load_b(&release, ATOMIC_ACQUIRE)) {
+		/* Spin until released. */
+	}
+}
+
+static void
+barrier_reset(void) {
+	atomic_store_u32(&n_waiting, 0, ATOMIC_RELAXED);
+	atomic_store_b(&release, false, ATOMIC_RELAXED);
+}
+
+static void *
+thd_start(void *arg) {
+	barrier_wait();
+
+	/*
+	 * All threads race to malloc simultaneously.
+	 * This triggers concurrent arena initialization with percpu_arena.
+	 */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc failed");
+	free(p);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_mt_background_thread_init) {
+	test_skip_if(!have_background_thread);
+	test_skip_if(!have_percpu_arena ||
+	    !PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
+	thd_t thds[N_INIT_THREADS];
+
+	expect_false(mtx_init(&barrier_mtx), "mtx_init failed");
+	n_threads = N_INIT_THREADS;
+	barrier_reset();
+
+	/* Create threads that will all race to call malloc(). */
+	for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+		thd_create(&thds[i], thd_start, NULL);
+	}
+
+	/* Wait for all threads to complete. */
+	for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+
+	mtx_fini(&barrier_mtx);
+
+	/*
+	 * Verify background threads are properly running. Before the fix,
+	 * the race could leave Thread 0 marked as "started" without an
+	 * actual pthread behind it.
+	 */
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	tsd_t *tsd = tsd_fetch();
+	background_thread_info_t *t0 = &background_thread_info[0];
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+	expect_d_eq(t0->state, background_thread_started,
+	    "Thread 0 should be in started state");
+	malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+	expect_zu_gt(n_background_threads, 0,
+	    "At least one background thread should be running");
+#endif
+}
+TEST_END
+
+TEST_BEGIN(test_mt_background_thread_init_stress) {
+	test_skip_if(!have_background_thread);
+	test_skip_if(!config_stats);
+
+	thd_t thds[N_INIT_THREADS];
+
+	expect_false(mtx_init(&barrier_mtx), "mtx_init failed");
+	n_threads = N_INIT_THREADS;
+
+	/*
+	 * Run multiple iterations to increase the chance of hitting
+	 * any race conditions. Each iteration creates new threads that
+	 * perform allocations concurrently.
+	 */
+	for (unsigned iter = 0; iter < N_ITERATIONS; iter++) {
+		barrier_reset();
+
+		for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+			thd_create(&thds[i], thd_start, NULL);
+		}
+
+		for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+			thd_join(thds[i], NULL);
+		}
+	}
+
+	mtx_fini(&barrier_mtx);
+
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	/*
+	 * Verify Thread 0 is actually running by checking it has done work.
+	 * Wait up to a few seconds for the background thread to run.
+	 */
+	tsd_t *tsd = tsd_fetch();
+	background_thread_info_t *t0 = &background_thread_info[0];
+
+	nstime_t start;
+	nstime_init_update(&start);
+
+	bool ran = false;
+	while (!ran) {
+		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+		if (t0->tot_n_runs > 0) {
+			ran = true;
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+		if (ran) {
+			break;
+		}
+
+		nstime_t now;
+		nstime_init_update(&now);
+		nstime_subtract(&now, &start);
+		if (nstime_sec(&now) > 10) {
+			/*
+			 * If Thread 0 hasn't run after 10 seconds, it's
+			 * likely not actually running (the bug condition).
+			 */
+			expect_true(false,
+			    "Thread 0 did not run within 10 seconds - "
+			    "possible initialization race");
+			break;
+		}
+		sleep_ns(100 * 1000 * 1000); /* 100ms */
+	}
+#endif
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_mt_background_thread_init,
+	    test_mt_background_thread_init_stress);
+}
diff --git a/test/unit/base.c b/test/unit/base.c
index 15e04a8c..e6e82435 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -3,55 +3,50 @@
 #include "test/extent_hooks.h"
 
 static extent_hooks_t hooks_null = {
-	extent_alloc_hook,
-	NULL, /* dalloc */
-	NULL, /* destroy */
-	NULL, /* commit */
-	NULL, /* decommit */
-	NULL, /* purge_lazy */
-	NULL, /* purge_forced */
-	NULL, /* split */
-	NULL /* merge */
+    extent_alloc_hook, NULL, /* dalloc */
+    NULL,                    /* destroy */
+    NULL,                    /* commit */
+    NULL,                    /* decommit */
+    NULL,                    /* purge_lazy */
+    NULL,                    /* purge_forced */
+    NULL,                    /* split */
+    NULL                     /* merge */
 };
 
 static extent_hooks_t hooks_not_null = {
-	extent_alloc_hook,
-	extent_dalloc_hook,
-	extent_destroy_hook,
-	NULL, /* commit */
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	NULL, /* split */
-	NULL /* merge */
+    extent_alloc_hook, extent_dalloc_hook, extent_destroy_hook,
+    NULL, /* commit */
+    extent_decommit_hook, extent_purge_lazy_hook, extent_purge_forced_hook,
+    NULL, /* split */
+    NULL  /* merge */
 };
 
 TEST_BEGIN(test_base_hooks_default) {
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped, n_thp;
+	size_t  allocated0, allocated1, edata_allocated, rtree_allocated,
+	    resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	base = base_new(tsdn, 0,
-	    (extent_hooks_t *)&ehooks_default_extent_hooks,
+	base = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks,
 	    /* metadata_use_hooks */ true);
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated0, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			expect_zu_gt(n_thp, 0,
-			    "Base should have 1 THP at least.");
+			expect_zu_gt(
+			    n_thp, 0, "Base should have 1 THP at least.");
 		}
 	}
 
-	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
-	    "Unexpected base_alloc() failure");
+	expect_ptr_not_null(
+	    base_alloc(tsdn, base, 42, 1), "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
@@ -62,8 +57,9 @@ TEST_END
 
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
-	base_t *base;
-	size_t allocated0, allocated1, resident, mapped, n_thp;
+	base_t        *base;
+	size_t         allocated0, allocated1, edata_allocated, rtree_allocated,
+	    resident, mapped, n_thp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -79,22 +75,22 @@ TEST_BEGIN(test_base_hooks_null) {
 	expect_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated0, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			expect_zu_gt(n_thp, 0,
-			    "Base should have 1 THP at least.");
+			expect_zu_gt(
+			    n_thp, 0, "Base should have 1 THP at least.");
 		}
 	}
 
-	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
-	    "Unexpected base_alloc() failure");
+	expect_ptr_not_null(
+	    base_alloc(tsdn, base, 42, 1), "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
@@ -107,8 +103,8 @@ TEST_END
 
 TEST_BEGIN(test_base_hooks_not_null) {
 	extent_hooks_t hooks_orig;
-	base_t *base;
-	void *p, *q, *r, *r_exp;
+	base_t        *base;
+	void          *p, *q, *r, *r_exp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -131,33 +127,34 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 */
 	{
 		const size_t alignments[] = {
-			1,
-			QUANTUM,
-			QUANTUM << 1,
-			CACHELINE,
-			CACHELINE << 1,
+		    1,
+		    QUANTUM,
+		    QUANTUM << 1,
+		    CACHELINE,
+		    CACHELINE << 1,
 		};
 		unsigned i;
 
 		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
 			size_t alignment = alignments[i];
-			size_t align_ceil = ALIGNMENT_CEILING(alignment,
-			    QUANTUM);
+			size_t align_ceil = ALIGNMENT_CEILING(
+			    alignment, QUANTUM);
 			p = base_alloc(tsdn, base, 1, alignment);
-			expect_ptr_not_null(p,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    p, "Unexpected base_alloc() failure");
 			expect_ptr_eq(p,
-			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
-			    alignment)), "Expected quantum alignment");
+			    (void *)(ALIGNMENT_CEILING(
+			        (uintptr_t)p, alignment)),
+			    "Expected quantum alignment");
 			q = base_alloc(tsdn, base, alignment, alignment);
-			expect_ptr_not_null(q,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    q, "Unexpected base_alloc() failure");
 			expect_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
 			r = base_alloc(tsdn, base, 1, alignment);
-			expect_ptr_not_null(r,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    r, "Unexpected base_alloc() failure");
 			expect_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
@@ -191,21 +188,18 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 * Check for proper alignment support when normal blocks are too small.
 	 */
 	{
-		const size_t alignments[] = {
-			HUGEPAGE,
-			HUGEPAGE << 1
-		};
-		unsigned i;
+		const size_t alignments[] = {HUGEPAGE, HUGEPAGE << 1};
+		unsigned     i;
 
 		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
 			size_t alignment = alignments[i];
 			p = base_alloc(tsdn, base, QUANTUM, alignment);
-			expect_ptr_not_null(p,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    p, "Unexpected base_alloc() failure");
 			expect_ptr_eq(p,
-			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
-			    alignment)), "Expected %zu-byte alignment",
-			    alignment);
+			    (void *)(ALIGNMENT_CEILING(
+			        (uintptr_t)p, alignment)),
+			    "Expected %zu-byte alignment", alignment);
 		}
 	}
 
@@ -235,12 +229,11 @@ TEST_BEGIN(test_base_ehooks_get_for_metadata_default_hook) {
 	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ false);
 	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	expect_true(ehooks_are_default(ehooks),
-		"Expected default extent hook functions pointer");
+	    "Expected default extent hook functions pointer");
 	base_delete(tsdn, base);
 }
 TEST_END
 
-
 TEST_BEGIN(test_base_ehooks_get_for_metadata_custom_hook) {
 	extent_hooks_prep();
 	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
@@ -249,17 +242,15 @@ TEST_BEGIN(test_base_ehooks_get_for_metadata_custom_hook) {
 	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
 	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	expect_ptr_eq(&hooks, ehooks_get_extent_hooks_ptr(ehooks),
-		"Expected user-specified extend hook functions pointer");
+	    "Expected user-specified extend hook functions pointer");
 	base_delete(tsdn, base);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_base_hooks_default,
-	    test_base_hooks_null,
+	return test(test_base_hooks_default, test_base_hooks_null,
 	    test_base_hooks_not_null,
-            test_base_ehooks_get_for_metadata_default_hook,
-            test_base_ehooks_get_for_metadata_custom_hook);
+	    test_base_ehooks_get_for_metadata_default_hook,
+	    test_base_ehooks_get_for_metadata_custom_hook);
 }
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 901c52b1..0c61bf77 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -6,8 +6,8 @@ static void *global_ptrs[BATCH_MAX];
 #define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
 
 static void
-verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
-    bool zero) {
+verify_batch_basic(
+    tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero) {
 	for (size_t i = 0; i < batch; ++i) {
 		void *p = ptrs[i];
 		expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
@@ -46,7 +46,8 @@ verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
 		assert(i > 0);
 		void *q = ptrs[i - 1];
 		expect_true((uintptr_t)p > (uintptr_t)q
-		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize, "");
+		        && (size_t)((uintptr_t)p - (uintptr_t)q) == usize,
+		    "");
 	}
 }
 
@@ -62,16 +63,17 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static size_t
 batch_alloc_wrapper(void **ptrs, size_t num, size_t size, int flags) {
 	batch_alloc_packet_t batch_alloc_packet = {ptrs, num, size, flags};
-	size_t filled;
-	size_t len = sizeof(size_t);
+	size_t               filled;
+	size_t               len = sizeof(size_t);
 	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
-	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	                &batch_alloc_packet, sizeof(batch_alloc_packet)),
+	    0, "");
 	return filled;
 }
 
@@ -79,16 +81,16 @@ static void
 test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 	tsd_t *tsd = tsd_fetch();
 	assert(tsd != NULL);
-	const size_t usize =
-	    (alignment != 0 ? sz_sa2u(size, alignment) : sz_s2u(size));
-	const szind_t ind = sz_size2index(usize);
+	const size_t      usize = (alignment != 0 ? sz_sa2u(size, alignment)
+	                                          : sz_s2u(size));
+	const szind_t     ind = sz_size2index(usize);
 	const bin_info_t *bin_info = &bin_infos[ind];
-	const unsigned nregs = bin_info->nregs;
+	const unsigned    nregs = bin_info->nregs;
 	assert(nregs > 0);
 	arena_t *arena;
 	if (arena_flag != 0) {
-		arena = arena_get(tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag),
-		    false);
+		arena = arena_get(
+		    tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag), false);
 	} else {
 		arena = arena_choose(tsd, NULL);
 	}
@@ -122,13 +124,13 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			}
 			size_t batch = base + (size_t)j;
 			assert(batch < BATCH_MAX);
-			size_t filled = batch_alloc_wrapper(global_ptrs, batch,
-			    size, flags);
+			size_t filled = batch_alloc_wrapper(
+			    global_ptrs, batch, size, flags);
 			assert_zu_eq(filled, batch, "");
-			verify_batch_basic(tsd, global_ptrs, batch, usize,
-			    zero);
-			verify_batch_locality(tsd, global_ptrs, batch, usize,
-			    arena, nregs);
+			verify_batch_basic(
+			    tsd, global_ptrs, batch, usize, zero);
+			verify_batch_locality(
+			    tsd, global_ptrs, batch, usize, arena, nregs);
 			release_batch(global_ptrs, batch, usize);
 		}
 	}
@@ -153,9 +155,10 @@ TEST_END
 
 TEST_BEGIN(test_batch_alloc_manual_arena) {
 	unsigned arena_ind;
-	size_t len_unsigned = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", &arena_ind, &len_unsigned, NULL,
-	    0), 0, "");
+	size_t   len_unsigned = sizeof(unsigned);
+	assert_d_eq(
+	    mallctl("arenas.create", &arena_ind, &len_unsigned, NULL, 0), 0,
+	    "");
 	test_wrapper(11, 0, false, MALLOCX_ARENA(arena_ind));
 }
 TEST_END
@@ -168,7 +171,7 @@ TEST_BEGIN(test_batch_alloc_large) {
 		assert_zu_eq(filled, batch, "");
 		release_batch(global_ptrs, batch, size);
 	}
-	size = tcache_maxclass + 1;
+	size = global_do_not_change_tcache_maxclass + 1;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
 		size_t filled = batch_alloc(global_ptrs, batch, size, 0);
@@ -180,10 +183,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_batch_alloc,
-	    test_batch_alloc_zero,
-	    test_batch_alloc_aligned,
-	    test_batch_alloc_manual_arena,
+	return test(test_batch_alloc, test_batch_alloc_zero,
+	    test_batch_alloc_aligned, test_batch_alloc_manual_arena,
 	    test_batch_alloc_large);
 }
diff --git a/test/unit/bin.c b/test/unit/bin.c
new file mode 100644
index 00000000..002bbf11
--- /dev/null
+++ b/test/unit/bin.c
@@ -0,0 +1,825 @@
+#include "test/jemalloc_test.h"
+
+#define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
+
+/* Create a page-aligned mock slab with all regions free. */
+static void
+create_mock_slab(edata_t *slab, szind_t binind, uint64_t sn) {
+	const bin_info_t *bin_info = &bin_infos[binind];
+	void *addr;
+	slab_data_t *slab_data;
+
+	addr = mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE));
+	assert_ptr_not_null(addr, "Unexpected mallocx failure");
+
+	memset(slab, 0, sizeof(edata_t));
+	edata_init(slab, INVALID_ARENA_IND, addr, bin_info->slab_size,
+	    true, binind, sn, extent_state_active, false, true,
+	    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+	edata_nfree_set(slab, bin_info->nregs);
+
+	/* Initialize bitmap to all regions free. */
+	slab_data = edata_slab_data_get(slab);
+	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
+}
+
+/*
+ * Test that bin_init produces a valid empty bin.
+ */
+TEST_BEGIN(test_bin_init) {
+	bin_t bin;
+	bool err;
+
+	err = bin_init(&bin);
+	expect_false(err, "bin_init should succeed");
+	expect_ptr_null(bin.slabcur, "New bin should have NULL slabcur");
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "New bin should have empty nonfull heap");
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "New bin should have empty full list");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nmalloc, 0,
+		    "New bin should have zero nmalloc");
+		expect_u64_eq(bin.stats.ndalloc, 0,
+		    "New bin should have zero ndalloc");
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "New bin should have zero curregs");
+		expect_zu_eq(bin.stats.curslabs, 0,
+		    "New bin should have zero curslabs");
+	}
+}
+TEST_END
+
+/*
+ * Test single-region allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	unsigned i;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+
+	for (i = 0; i < nregs; i++) {
+		void *reg;
+
+		expect_u_gt(edata_nfree_get(&slab), 0,
+		    "Slab should have free regions");
+		reg = bin_slab_reg_alloc(&slab, bin_info);
+		expect_ptr_not_null(reg,
+		    "bin_slab_reg_alloc should return non-NULL");
+		/* Verify the pointer is within the slab. */
+		expect_true(
+		    (uintptr_t)reg >= (uintptr_t)edata_addr_get(&slab) &&
+		    (uintptr_t)reg < (uintptr_t)edata_addr_get(&slab)
+		    + bin_info->slab_size,
+		    "Allocated region should be within slab bounds");
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0,
+	    "Slab should be full after allocating all regions");
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test batch allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc_batch) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	void **ptrs;
+	unsigned i;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	bin_slab_reg_alloc_batch(&slab, bin_info, nregs, ptrs);
+	expect_u_eq(edata_nfree_get(&slab), 0,
+	    "Slab should be full after batch alloc of all regions");
+
+	/* Verify all pointers are within the slab and distinct. */
+	for (i = 0; i < nregs; i++) {
+		unsigned j;
+
+		expect_ptr_not_null(ptrs[i], "Batch pointer should be non-NULL");
+		expect_true(
+		    (uintptr_t)ptrs[i] >= (uintptr_t)edata_addr_get(&slab) &&
+		    (uintptr_t)ptrs[i] < (uintptr_t)edata_addr_get(&slab)
+		    + bin_info->slab_size,
+		    "Batch pointer should be within slab bounds");
+		for (j = 0; j < i; j++) {
+			expect_ptr_ne(ptrs[i], ptrs[j],
+			    "Batch pointers should be distinct");
+		}
+	}
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test partial batch allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc_batch_partial) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	unsigned half;
+	void **ptrs;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+
+	/* Only allocate half. */
+	half = nregs / 2;
+	if (half == 0) {
+		half = 1;
+	}
+	ptrs = mallocx(half * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	bin_slab_reg_alloc_batch(&slab, bin_info, half, ptrs);
+	expect_u_eq(edata_nfree_get(&slab), nregs - half,
+	    "Slab nfree should reflect partial batch alloc");
+
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test nonfull slab list insert, remove, and tryget.
+ */
+TEST_BEGIN(test_bin_slabs_nonfull) {
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab1, slab2;
+	edata_t *got;
+	edata_t *remaining;
+
+	bin_init(&bin);
+
+	/* Create two non-full slabs with different serial numbers. */
+	create_mock_slab(&slab1, binind, 1);
+	create_mock_slab(&slab2, binind, 2);
+
+	/* Insert both into the nonfull heap. */
+	bin_slabs_nonfull_insert(&bin, &slab1);
+	expect_ptr_not_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be non-empty after insert");
+
+	bin_slabs_nonfull_insert(&bin, &slab2);
+
+	/* tryget should return a slab. */
+	got = bin_slabs_nonfull_tryget(&bin);
+	expect_ptr_not_null(got, "tryget should return a slab");
+
+	/* Remove the remaining one explicitly. */
+	remaining = edata_heap_first(&bin.slabs_nonfull);
+	expect_ptr_not_null(remaining, "One slab should still remain");
+	bin_slabs_nonfull_remove(&bin, remaining);
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be empty after removing both slabs");
+
+	free(edata_addr_get(&slab1));
+	free(edata_addr_get(&slab2));
+}
+TEST_END
+
+/*
+ * Test full slab list insert and remove (non-auto arena case).
+ */
+TEST_BEGIN(test_bin_slabs_full) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned i;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/* Consume all regions so the slab appears full. */
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&slab, bin_info);
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Insert into full list (is_auto=false to actually track). */
+	bin_slabs_full_insert(false, &bin, &slab);
+	expect_false(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should be non-empty after insert");
+
+	/* Remove from full list. */
+	bin_slabs_full_remove(false, &bin, &slab);
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should be empty after remove");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that full slab insert/remove is a no-op for auto arenas.
+ */
+TEST_BEGIN(test_bin_slabs_full_auto) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned i;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&slab, bin_info);
+	}
+
+	/* is_auto=true: insert should be a no-op. */
+	bin_slabs_full_insert(true, &bin, &slab);
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should remain empty for auto arenas");
+
+	/* Remove should also be a no-op without crashing. */
+	bin_slabs_full_remove(true, &bin, &slab);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test dissociate_slab when the slab is slabcur.
+ */
+TEST_BEGIN(test_bin_dissociate_slabcur) {
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	bin.slabcur = &slab;
+	bin_dissociate_slab(true, &slab, &bin);
+	expect_ptr_null(bin.slabcur,
+	    "Dissociating slabcur should NULL it out");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test dissociate_slab when the slab is in the nonfull heap.
+ */
+TEST_BEGIN(test_bin_dissociate_nonfull) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/*
+	 * Only dissociate from nonfull when nregs > 1.  For nregs == 1,
+	 * the slab goes directly to the full list, never nonfull.
+	 */
+	test_skip_if(bin_info->nregs == 1);
+
+	bin_slabs_nonfull_insert(&bin, &slab);
+	bin_dissociate_slab(true, &slab, &bin);
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be empty after dissociating the slab");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test refill slabcur with a fresh slab.
+ */
+TEST_BEGIN(test_bin_refill_slabcur_with_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t fresh;
+
+	bin_init(&bin);
+	create_mock_slab(&fresh, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_refill_slabcur_with_fresh_slab(tsdn, &bin, binind, &fresh);
+	expect_ptr_eq(bin.slabcur, &fresh,
+	    "Fresh slab should become slabcur");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nslabs, 1,
+		    "nslabs should be 1 after installing fresh slab");
+		expect_zu_eq(bin.stats.curslabs, 1,
+		    "curslabs should be 1 after installing fresh slab");
+	}
+	expect_u_eq(edata_nfree_get(bin.slabcur), bin_info->nregs,
+	    "Fresh slab should have all regions free");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&fresh));
+}
+TEST_END
+
+/*
+ * Test refill slabcur without a fresh slab (from the nonfull heap).
+ */
+TEST_BEGIN(test_bin_refill_slabcur_no_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+	bool empty;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* With no slabcur and empty nonfull heap, refill should fail. */
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, true, &bin);
+	expect_true(empty,
+	    "Refill should fail when nonfull heap is empty");
+	expect_ptr_null(bin.slabcur, "slabcur should remain NULL");
+
+	/* Insert a slab into nonfull, then refill should succeed. */
+	bin_slabs_nonfull_insert(&bin, &slab);
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, true, &bin);
+	expect_false(empty,
+	    "Refill should succeed when nonfull heap has a slab");
+	expect_ptr_eq(bin.slabcur, &slab,
+	    "slabcur should be the slab from nonfull heap");
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that refill moves a full slabcur into the full list.
+ */
+TEST_BEGIN(test_bin_refill_slabcur_full_to_list) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t full_slab, nonfull_slab;
+	unsigned i;
+	bool empty;
+
+	bin_init(&bin);
+	create_mock_slab(&full_slab, binind, 0);
+	create_mock_slab(&nonfull_slab, binind, 1);
+
+	/* Make full_slab actually full. */
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&full_slab, bin_info);
+	}
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin.slabcur = &full_slab;
+	bin_slabs_nonfull_insert(&bin, &nonfull_slab);
+
+	/* Refill should move the full slabcur to full list and pick nonfull. */
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, false, &bin);
+	expect_false(empty, "Refill should succeed");
+	expect_ptr_eq(bin.slabcur, &nonfull_slab,
+	    "slabcur should now be the nonfull slab");
+	expect_false(edata_list_active_empty(&bin.slabs_full),
+	    "Old full slabcur should be in the full list");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&full_slab));
+	free(edata_addr_get(&nonfull_slab));
+}
+TEST_END
+
+/*
+ * Test malloc with a fresh slab.
+ */
+TEST_BEGIN(test_bin_malloc_with_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t fresh;
+	void *ptr;
+
+	bin_init(&bin);
+	create_mock_slab(&fresh, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	ptr = bin_malloc_with_fresh_slab(tsdn, &bin, binind, &fresh);
+	expect_ptr_not_null(ptr, "Should allocate from fresh slab");
+	expect_ptr_eq(bin.slabcur, &fresh,
+	    "Fresh slab should be installed as slabcur");
+	expect_u_eq(edata_nfree_get(&fresh), bin_info->nregs - 1,
+	    "One region should be consumed from fresh slab");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nslabs, 1, "nslabs should be 1");
+		expect_zu_eq(bin.stats.curslabs, 1, "curslabs should be 1");
+	}
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&fresh));
+}
+TEST_END
+
+/*
+ * Test malloc without a fresh slab (from existing slabcur).
+ */
+TEST_BEGIN(test_bin_malloc_no_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	void *ptr;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* With no slabcur and empty nonfull, should return NULL. */
+	ptr = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+	expect_ptr_null(ptr,
+	    "Should return NULL when no slabs available");
+
+	/* Set up a slabcur; malloc should succeed. */
+	bin.slabcur = &slab;
+	ptr = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+	expect_ptr_not_null(ptr,
+	    "Should allocate from slabcur");
+	expect_u_eq(edata_nfree_get(&slab), bin_info->nregs - 1,
+	    "One region should be consumed");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test the bin_dalloc_locked begin/step/finish sequence.
+ */
+TEST_BEGIN(test_bin_dalloc_locked) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	void **ptrs;
+	unsigned i;
+	bin_dalloc_locked_info_t info;
+	bool slab_empty;
+	bool found_empty;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/* Allocate all regions from the slab. */
+	nregs = bin_info->nregs;
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+	for (i = 0; i < nregs; i++) {
+		ptrs[i] = bin_slab_reg_alloc(&slab, bin_info);
+		assert_ptr_not_null(ptrs[i], "Alloc should succeed");
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Set this slab as slabcur so dalloc steps work correctly. */
+	bin.slabcur = &slab;
+	if (config_stats) {
+		bin.stats.nmalloc = nregs;
+		bin.stats.curregs = nregs;
+		bin.stats.nslabs = 1;
+		bin.stats.curslabs = 1;
+	}
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* Free one region and verify step returns false (not yet empty). */
+	bin_dalloc_locked_begin(&info, binind);
+	slab_empty = bin_dalloc_locked_step(
+	    tsdn, true, &bin, &info, binind, &slab, ptrs[0]);
+	if (nregs > 1) {
+		expect_false(slab_empty,
+		    "Slab should not be empty after freeing one region");
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, nregs - 1,
+		    "curregs should decrement by 1");
+	}
+
+	/* Free all remaining regions; the last one should empty the slab. */
+	bin_dalloc_locked_begin(&info, binind);
+	found_empty = false;
+	for (i = 1; i < nregs; i++) {
+		slab_empty = bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptrs[i]);
+		if (slab_empty) {
+			found_empty = true;
+		}
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+	expect_true(found_empty,
+	    "Freeing all regions should produce an empty slab");
+	expect_u_eq(edata_nfree_get(&slab), nregs,
+	    "All regions should be free");
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "curregs should be 0 after freeing all");
+	}
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that bin_lower_slab replaces slabcur when the new slab is older.
+ */
+TEST_BEGIN(test_bin_lower_slab_replaces_slabcur) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab_old, slab_new;
+
+	bin_init(&bin);
+
+	/* slab_old has sn=0 (older), slab_new has sn=1 (newer). */
+	create_mock_slab(&slab_old, binind, 0);
+	create_mock_slab(&slab_new, binind, 1);
+
+	/* Make slab_new the slabcur. */
+	bin.slabcur = &slab_new;
+
+	/*
+	 * bin_lower_slab with the older slab should replace slabcur and move
+	 * slab_new into either nonfull or full.
+	 */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_lower_slab(tsdn, true, &slab_old, &bin);
+	expect_ptr_eq(bin.slabcur, &slab_old,
+	    "Older slab should replace slabcur");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab_old));
+	free(edata_addr_get(&slab_new));
+}
+TEST_END
+
+/*
+ * Test that bin_lower_slab inserts into the nonfull heap when the new slab
+ * is newer than slabcur.
+ */
+TEST_BEGIN(test_bin_lower_slab_inserts_nonfull) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab_old, slab_new;
+
+	bin_init(&bin);
+	create_mock_slab(&slab_old, binind, 0);
+	create_mock_slab(&slab_new, binind, 1);
+
+	/* Make slab_old the slabcur (older). */
+	bin.slabcur = &slab_old;
+
+	/* bin_lower_slab with the newer slab should insert into nonfull. */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_lower_slab(tsdn, true, &slab_new, &bin);
+	expect_ptr_eq(bin.slabcur, &slab_old,
+	    "Older slabcur should remain");
+	expect_ptr_not_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Newer slab should be inserted into nonfull heap");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab_old));
+	free(edata_addr_get(&slab_new));
+}
+TEST_END
+
+/*
+ * Test bin_dalloc_slab_prepare updates stats.
+ */
+TEST_BEGIN(test_bin_dalloc_slab_prepare) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	if (config_stats) {
+		bin.stats.curslabs = 2;
+	}
+
+	/*
+	 * bin_dalloc_slab_prepare requires the slab is not slabcur,
+	 * so leave slabcur NULL.
+	 */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_dalloc_slab_prepare(tsdn, &slab, &bin);
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curslabs, 1,
+		    "curslabs should decrement");
+	}
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test bin_shard_sizes_boot and bin_update_shard_size.
+ */
+TEST_BEGIN(test_bin_shard_sizes) {
+	unsigned shard_sizes[SC_NBINS];
+	unsigned i;
+	bool err;
+	szind_t ind1, ind2;
+
+	/* Boot should set all to the default. */
+	bin_shard_sizes_boot(shard_sizes);
+	for (i = 0; i < SC_NBINS; i++) {
+		expect_u_eq(shard_sizes[i], N_BIN_SHARDS_DEFAULT,
+		    "Shard sizes should be default after boot");
+	}
+
+	/* Update with nshards=0 should fail (returns true). */
+	err = bin_update_shard_size(shard_sizes, 1, 1, 0);
+	expect_true(err, "nshards=0 should be an error");
+
+	/* Update with nshards > BIN_SHARDS_MAX should fail. */
+	err = bin_update_shard_size(shard_sizes, 1, 1, BIN_SHARDS_MAX + 1);
+	expect_true(err, "nshards > BIN_SHARDS_MAX should be an error");
+
+	/* Valid update: set a range to 4 shards. */
+	err = bin_update_shard_size(shard_sizes, 1, 128, 4);
+	expect_false(err, "Valid update should succeed");
+	/* Verify the range was updated. */
+	ind1 = sz_size2index_compute(1);
+	ind2 = sz_size2index_compute(128);
+	for (i = ind1; i <= ind2; i++) {
+		expect_u_eq(shard_sizes[i], 4,
+		    "Updated range should have nshards=4");
+	}
+
+	/* Update beyond SC_SMALL_MAXCLASS should be clamped, not fail. */
+	err = bin_update_shard_size(shard_sizes,
+	    SC_SMALL_MAXCLASS, SC_SMALL_MAXCLASS * 2, 2);
+	expect_false(err,
+	    "Update with end beyond SMALL_MAXCLASS should succeed");
+}
+TEST_END
+
+/*
+ * Test a full alloc-then-free cycle by allocating all regions from a bin
+ * via bin_malloc_with_fresh_slab, then freeing them all via the
+ * bin_dalloc_locked sequence.
+ */
+TEST_BEGIN(test_bin_alloc_free_cycle) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	unsigned nregs = bin_info->nregs;
+	edata_t slab;
+	void **ptrs;
+	unsigned i;
+	bin_dalloc_locked_info_t info;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* Allocate the first pointer via fresh slab path. */
+	ptrs[0] = bin_malloc_with_fresh_slab(tsdn, &bin, binind, &slab);
+	expect_ptr_not_null(ptrs[0], "First alloc should succeed");
+
+	/* Allocate the rest from slabcur. */
+	for (i = 1; i < nregs; i++) {
+		ptrs[i] = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+		expect_ptr_not_null(ptrs[i], "Alloc should succeed");
+	}
+	if (config_stats) {
+		bin.stats.nmalloc += nregs;
+		bin.stats.curregs += nregs;
+	}
+
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Free all regions. */
+	bin_dalloc_locked_begin(&info, binind);
+	for (i = 0; i < nregs; i++) {
+		bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptrs[i]);
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+
+	expect_u_eq(edata_nfree_get(&slab), nregs,
+	    "All regions should be free after full cycle");
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "curregs should be 0 after full cycle");
+	}
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test alloc/free cycle across multiple bin size classes.
+ */
+TEST_BEGIN(test_bin_multi_size_class) {
+	tsdn_t *tsdn = tsdn_fetch();
+	szind_t test_indices[] = {0, SC_NBINS / 2, SC_NBINS - 1};
+	unsigned nindices = sizeof(test_indices) / sizeof(test_indices[0]);
+	unsigned t;
+
+	for (t = 0; t < nindices; t++) {
+		szind_t binind = test_indices[t];
+		const bin_info_t *bin_info = &bin_infos[binind];
+		bin_t bin;
+		edata_t slab;
+		void *ptr;
+		bin_dalloc_locked_info_t info;
+
+		bin_init(&bin);
+		create_mock_slab(&slab, binind, 0);
+
+		malloc_mutex_lock(tsdn, &bin.lock);
+		ptr = bin_malloc_with_fresh_slab(
+		    tsdn, &bin, binind, &slab);
+		expect_ptr_not_null(ptr,
+		    "Alloc should succeed for binind %u", binind);
+		expect_u_eq(edata_nfree_get(&slab), bin_info->nregs - 1,
+		    "nfree should be nregs-1 for binind %u", binind);
+
+		/* Free the allocated region. */
+		if (config_stats) {
+			bin.stats.nmalloc = 1;
+			bin.stats.curregs = 1;
+		}
+		bin_dalloc_locked_begin(&info, binind);
+		bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptr);
+		bin_dalloc_locked_finish(tsdn, &bin, &info);
+
+		expect_u_eq(edata_nfree_get(&slab), bin_info->nregs,
+		    "All regions should be free for binind %u", binind);
+		malloc_mutex_unlock(tsdn, &bin.lock);
+
+		free(edata_addr_get(&slab));
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_bin_init,
+	    test_bin_slab_reg_alloc,
+	    test_bin_slab_reg_alloc_batch,
+	    test_bin_slab_reg_alloc_batch_partial,
+	    test_bin_slabs_nonfull,
+	    test_bin_slabs_full,
+	    test_bin_slabs_full_auto,
+	    test_bin_dissociate_slabcur,
+	    test_bin_dissociate_nonfull,
+	    test_bin_refill_slabcur_with_fresh_slab,
+	    test_bin_refill_slabcur_no_fresh_slab,
+	    test_bin_refill_slabcur_full_to_list,
+	    test_bin_malloc_with_fresh_slab,
+	    test_bin_malloc_no_fresh_slab,
+	    test_bin_dalloc_locked,
+	    test_bin_lower_slab_replaces_slabcur,
+	    test_bin_lower_slab_inserts_nonfull,
+	    test_bin_dalloc_slab_prepare,
+	    test_bin_shard_sizes,
+	    test_bin_alloc_free_cycle,
+	    test_bin_multi_size_class);
+}
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 040ea54d..c3e1c2d6 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -7,9 +7,9 @@
 
 static void *
 thd_producer(void *varg) {
-	void **mem = varg;
+	void   **mem = varg;
 	unsigned arena, i;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(arena);
 	/* Remote arena. */
@@ -28,8 +28,8 @@ thd_producer(void *varg) {
 }
 
 TEST_BEGIN(test_producer_consumer) {
-	thd_t thds[NTHREADS];
-	void *mem[NTHREADS][REMOTE_NALLOC];
+	thd_t    thds[NTHREADS];
+	void    *mem[NTHREADS][REMOTE_NALLOC];
 	unsigned i;
 
 	/* Create producer threads to allocate. */
@@ -42,8 +42,8 @@ TEST_BEGIN(test_producer_consumer) {
 	/* Remote deallocation by the current thread. */
 	for (i = 0; i < NTHREADS; i++) {
 		for (unsigned j = 0; j < REMOTE_NALLOC; j++) {
-			expect_ptr_not_null(mem[i][j],
-			    "Unexpected remote allocation failure");
+			expect_ptr_not_null(
+			    mem[i][j], "Unexpected remote allocation failure");
 			dallocx(mem[i][j], 0);
 		}
 	}
@@ -52,7 +52,7 @@ TEST_END
 
 static void *
 thd_start(void *varg) {
-	void *ptr, *ptr2;
+	void    *ptr, *ptr2;
 	edata_t *edata;
 	unsigned shard1, shard2;
 
@@ -82,10 +82,10 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_bin_shard_mt) {
-	test_skip_if(have_percpu_arena &&
-	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned i;
 	for (i = 0; i < NTHREADS; i++) {
 		thd_create(&thds[i], thd_start, NULL);
@@ -104,8 +104,8 @@ TEST_END
 
 TEST_BEGIN(test_bin_shard) {
 	unsigned nbins, i;
-	size_t mib[4], mib2[4];
-	size_t miblen, miblen2, len;
+	size_t   mib[4], mib2[4];
+	size_t   miblen, miblen2, len;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
@@ -120,17 +120,19 @@ TEST_BEGIN(test_bin_shard) {
 
 	for (i = 0; i < nbins; i++) {
 		uint32_t nshards;
-		size_t size, sz1, sz2;
+		size_t   size, sz1, sz2;
 
 		mib[2] = i;
 		sz1 = sizeof(nshards);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&nshards, &sz1,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&nshards, &sz1, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 
 		mib2[2] = i;
 		sz2 = sizeof(size);
-		expect_d_eq(mallctlbymib(mib2, miblen2, (void *)&size, &sz2,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib2, miblen2, (void *)&size, &sz2, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 
 		if (size >= 1 && size <= 128) {
 			expect_u_eq(nshards, 16, "Unexpected nshards");
@@ -148,7 +150,5 @@ TEST_END
 int
 main(void) {
 	return test_no_reentrancy(
-	    test_bin_shard,
-	    test_bin_shard_mt,
-	    test_producer_consumer);
+	    test_bin_shard, test_bin_shard_mt, test_producer_consumer);
 }
diff --git a/test/unit/binshard.sh b/test/unit/binshard.sh
index c1d58c88..1882e90a 100644
--- a/test/unit/binshard.sh
+++ b/test/unit/binshard.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="narenas:1,bin_shards:1-160:16|129-512:4|256-256:8"
+export MALLOC_CONF="narenas:1,bin_shards:1-160:16|129-512:4|256-256:8|513-8070450532247928832:1"
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 7d31b210..986562d1 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -2,36 +2,37 @@
 
 #include "jemalloc/internal/bit_util.h"
 
-#define TEST_POW2_CEIL(t, suf, pri) do {				\
-	unsigned i, pow2;						\
-	t x;								\
-									\
-	expect_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
-									\
-	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		expect_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
-		    << i, "Unexpected result");				\
-	}								\
-									\
-	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
-		    ((t)1) << i, "Unexpected result");			\
-	}								\
-									\
-	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
-		    ((t)1) << (i+1), "Unexpected result");		\
-	}								\
-									\
-	for (pow2 = 1; pow2 < 25; pow2++) {				\
-		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
-		    x++) {						\
-			expect_##suf##_eq(pow2_ceil_##suf(x),		\
-			    ((t)1) << pow2,				\
-			    "Unexpected result, x=%"pri, x);		\
-		}							\
-	}								\
-} while (0)
+#define TEST_POW2_CEIL(t, suf, pri)                                            \
+	do {                                                                   \
+		unsigned i, pow2;                                              \
+		t        x;                                                    \
+                                                                               \
+		expect_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \
+                                                                               \
+		for (i = 0; i < sizeof(t) * 8; i++) {                          \
+			expect_##suf##_eq(pow2_ceil_##suf(((t)1) << i),        \
+			    ((t)1) << i, "Unexpected result");                 \
+		}                                                              \
+                                                                               \
+		for (i = 2; i < sizeof(t) * 8; i++) {                          \
+			expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),  \
+			    ((t)1) << i, "Unexpected result");                 \
+		}                                                              \
+                                                                               \
+		for (i = 0; i < sizeof(t) * 8 - 1; i++) {                      \
+			expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),  \
+			    ((t)1) << (i + 1), "Unexpected result");           \
+		}                                                              \
+                                                                               \
+		for (pow2 = 1; pow2 < 25; pow2++) {                            \
+			for (x = (((t)1) << (pow2 - 1)) + 1;                   \
+			     x <= ((t)1) << pow2; x++) {                       \
+				expect_##suf##_eq(pow2_ceil_##suf(x),          \
+				    ((t)1) << pow2,                            \
+				    "Unexpected result, x=%" pri, x);          \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_pow2_ceil_u64) {
 	TEST_POW2_CEIL(uint64_t, u64, FMTu64);
@@ -48,26 +49,26 @@ TEST_BEGIN(test_pow2_ceil_zu) {
 }
 TEST_END
 
-void
+static void
 expect_lg_ceil_range(size_t input, unsigned answer) {
 	if (input == 1) {
 		expect_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
 		return;
 	}
-	expect_zu_le(input, (ZU(1) << answer),
-	    "Got %u as lg_ceil of %zu", answer, input);
-	expect_zu_gt(input, (ZU(1) << (answer - 1)),
-	    "Got %u as lg_ceil of %zu", answer, input);
+	expect_zu_le(input, (ZU(1) << answer), "Got %u as lg_ceil of %zu",
+	    answer, input);
+	expect_zu_gt(input, (ZU(1) << (answer - 1)), "Got %u as lg_ceil of %zu",
+	    answer, input);
 }
 
-void
+static void
 expect_lg_floor_range(size_t input, unsigned answer) {
 	if (input == 1) {
 		expect_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
 		return;
 	}
-	expect_zu_ge(input, (ZU(1) << answer),
-	    "Got %u as lg_floor of %zu", answer, input);
+	expect_zu_ge(input, (ZU(1) << answer), "Got %u as lg_floor of %zu",
+	    answer, input);
 	expect_zu_lt(input, (ZU(1) << (answer + 1)),
 	    "Got %u as lg_floor of %zu", answer, input);
 }
@@ -101,22 +102,24 @@ TEST_BEGIN(test_lg_ceil_floor) {
 }
 TEST_END
 
-#define TEST_FFS(t, suf, test_suf, pri) do {				\
-	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
-		for (unsigned j = 0; j <= i; j++) {			\
-			for (unsigned k = 0; k <= j; k++) {		\
-				t x = (t)1 << i;			\
-				x |= (t)1 << j;				\
-				x |= (t)1 << k;				\
-				expect_##test_suf##_eq(ffs_##suf(x), k,	\
-				    "Unexpected result, x=%"pri, x);	\
-			}						\
-		}							\
-	}								\
-} while(0)
+#define TEST_FFS(t, suf, test_suf, pri)                                        \
+	do {                                                                   \
+		for (unsigned i = 0; i < sizeof(t) * 8; i++) {                 \
+			for (unsigned j = 0; j <= i; j++) {                    \
+				for (unsigned k = 0; k <= j; k++) {            \
+					t x = (t)1 << i;                       \
+					x |= (t)1 << j;                        \
+					x |= (t)1 << k;                        \
+					expect_##test_suf##_eq(ffs_##suf(x),   \
+					    k, "Unexpected result, x=%" pri,   \
+					    x);                                \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_ffs_u) {
-	TEST_FFS(unsigned, u, u,"u");
+	TEST_FFS(unsigned, u, u, "u");
 }
 TEST_END
 
@@ -145,22 +148,24 @@ TEST_BEGIN(test_ffs_zu) {
 }
 TEST_END
 
-#define TEST_FLS(t, suf, test_suf, pri) do {				\
-	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
-		for (unsigned j = 0; j <= i; j++) {			\
-			for (unsigned k = 0; k <= j; k++) {		\
-				t x = (t)1 << i;			\
-				x |= (t)1 << j;				\
-				x |= (t)1 << k;				\
-				expect_##test_suf##_eq(fls_##suf(x), i,	\
-				    "Unexpected result, x=%"pri, x);	\
-			}						\
-		}							\
-	}								\
-} while(0)
+#define TEST_FLS(t, suf, test_suf, pri)                                        \
+	do {                                                                   \
+		for (unsigned i = 0; i < sizeof(t) * 8; i++) {                 \
+			for (unsigned j = 0; j <= i; j++) {                    \
+				for (unsigned k = 0; k <= j; k++) {            \
+					t x = (t)1 << i;                       \
+					x |= (t)1 << j;                        \
+					x |= (t)1 << k;                        \
+					expect_##test_suf##_eq(fls_##suf(x),   \
+					    i, "Unexpected result, x=%" pri,   \
+					    x);                                \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_fls_u) {
-	TEST_FLS(unsigned, u, u,"u");
+	TEST_FLS(unsigned, u, u, "u");
 }
 TEST_END
 
@@ -190,7 +195,7 @@ TEST_BEGIN(test_fls_zu) {
 TEST_END
 
 TEST_BEGIN(test_fls_u_slow) {
-	TEST_FLS(unsigned, u_slow, u,"u");
+	TEST_FLS(unsigned, u_slow, u, "u");
 }
 TEST_END
 
@@ -226,6 +231,7 @@ expand_byte_to_mask(unsigned byte) {
 	return result;
 }
 
+/* clang-format off */
 #define TEST_POPCOUNT(t, suf, pri_hex) do {				\
 	t bmul = (t)0x0101010101010101ULL;				\
 	for (unsigned i = 0; i < (1 << sizeof(t)); i++) {		\
@@ -245,6 +251,7 @@ expand_byte_to_mask(unsigned byte) {
 		}							\
 	}								\
 } while (0)
+/* clang-format on */
 
 TEST_BEGIN(test_popcount_u) {
 	TEST_POPCOUNT(unsigned, u, "x");
@@ -278,30 +285,11 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_pow2_ceil_u64,
-	    test_pow2_ceil_u32,
-	    test_pow2_ceil_zu,
-	    test_lg_ceil_floor,
-	    test_ffs_u,
-	    test_ffs_lu,
-	    test_ffs_llu,
-	    test_ffs_u32,
-	    test_ffs_u64,
-	    test_ffs_zu,
-	    test_fls_u,
-	    test_fls_lu,
-	    test_fls_llu,
-	    test_fls_u32,
-	    test_fls_u64,
-	    test_fls_zu,
-	    test_fls_u_slow,
-	    test_fls_lu_slow,
-	    test_fls_llu_slow,
-	    test_popcount_u,
-	    test_popcount_u_slow,
-	    test_popcount_lu,
-	    test_popcount_lu_slow,
-	    test_popcount_llu,
-	    test_popcount_llu_slow);
+	return test_no_reentrancy(test_pow2_ceil_u64, test_pow2_ceil_u32,
+	    test_pow2_ceil_zu, test_lg_ceil_floor, test_ffs_u, test_ffs_lu,
+	    test_ffs_llu, test_ffs_u32, test_ffs_u64, test_ffs_zu, test_fls_u,
+	    test_fls_lu, test_fls_llu, test_fls_u32, test_fls_u64, test_fls_zu,
+	    test_fls_u_slow, test_fls_lu_slow, test_fls_llu_slow,
+	    test_popcount_u, test_popcount_u_slow, test_popcount_lu,
+	    test_popcount_lu_slow, test_popcount_llu, test_popcount_llu_slow);
 }
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 78e542b6..b3048cf3 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -9,14 +9,17 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 
 	expect_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 	expect_zu_eq(binfo->nbits, binfo_dyn.nbits,
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 #ifdef BITMAP_USE_TREE
 	expect_u_eq(binfo->nlevels, binfo_dyn.nlevels,
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 	{
 		unsigned i;
 
@@ -24,7 +27,8 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 			expect_zu_eq(binfo->levels[i].group_offset,
 			    binfo_dyn.levels[i].group_offset,
 			    "Unexpected difference between static and dynamic "
-			    "initialization, nbits=%zu, level=%u", nbits, i);
+			    "initialization, nbits=%zu, level=%u",
+			    nbits, i);
 		}
 	}
 #else
@@ -34,12 +38,12 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 }
 
 TEST_BEGIN(test_bitmap_initializer) {
-#define NB(nbits) {							\
-		if (nbits <= BITMAP_MAXBITS) {				\
-			bitmap_info_t binfo =				\
-			    BITMAP_INFO_INITIALIZER(nbits);		\
-			test_bitmap_initializer_body(&binfo, nbits);	\
-		}							\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		if (nbits <= BITMAP_MAXBITS) {                                 \
+			bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);  \
+			test_bitmap_initializer_body(&binfo, nbits);           \
+		}                                                              \
 	}
 	NBITS_TAB
 #undef NB
@@ -47,11 +51,11 @@ TEST_BEGIN(test_bitmap_initializer) {
 TEST_END
 
 static size_t
-test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
-    size_t prev_size) {
+test_bitmap_size_body(
+    const bitmap_info_t *binfo, size_t nbits, size_t prev_size) {
 	size_t size = bitmap_size(binfo);
-	expect_zu_ge(size, (nbits >> 3),
-	    "Bitmap size is smaller than expected");
+	expect_zu_ge(
+	    size, (nbits >> 3), "Bitmap size is smaller than expected");
 	expect_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
 	return size;
 }
@@ -65,10 +69,10 @@ TEST_BEGIN(test_bitmap_size) {
 		bitmap_info_init(&binfo, nbits);
 		prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		prev_size = test_bitmap_size_body(&binfo, nbits,	\
-		    prev_size);						\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);   \
 	}
 	prev_size = 0;
 	NBITS_TAB
@@ -78,14 +82,14 @@ TEST_END
 
 static void
 test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 
 	bitmap_init(bitmap, binfo, false);
 	for (i = 0; i < nbits; i++) {
-		expect_false(bitmap_get(bitmap, binfo, i),
-		    "Bit should be unset");
+		expect_false(
+		    bitmap_get(bitmap, binfo, i), "Bit should be unset");
 	}
 
 	bitmap_init(bitmap, binfo, true);
@@ -104,9 +108,10 @@ TEST_BEGIN(test_bitmap_init) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_init_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_init_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_init_body(&binfo, nbits);                          \
 	}
 	NBITS_TAB
 #undef NB
@@ -115,7 +120,7 @@ TEST_END
 
 static void
 test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
@@ -135,9 +140,10 @@ TEST_BEGIN(test_bitmap_set) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_set_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_set_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_set_body(&binfo, nbits);                           \
 	}
 	NBITS_TAB
 #undef NB
@@ -146,7 +152,7 @@ TEST_END
 
 static void
 test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
@@ -173,9 +179,10 @@ TEST_BEGIN(test_bitmap_unset) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_unset_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_unset_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_unset_body(&binfo, nbits);                         \
 	}
 	NBITS_TAB
 #undef NB
@@ -193,7 +200,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
@@ -213,7 +220,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		bitmap_unset(bitmap, binfo, i);
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should the bit previously unset");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should the bit previously unset");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should the bit previously unset");
@@ -232,7 +239,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
@@ -245,7 +252,8 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	}
 	expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
 	    "First unset bit should be the last bit");
-	expect_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
+	expect_zu_eq(
+	    bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits - 2 : nbits - 1),
 	    nbits - 1, "First unset bit should be the last bit");
 	expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
 	    "First unset bit should be the last bit");
@@ -258,26 +266,26 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 * bitmap_ffu() finds the correct bit for all five min_bit cases.
 	 */
 	if (nbits >= 3) {
-		for (size_t i = 0; i < nbits-2; i++) {
+		for (size_t i = 0; i < nbits - 2; i++) {
 			bitmap_unset(bitmap, binfo, i);
-			bitmap_unset(bitmap, binfo, i+2);
+			bitmap_unset(bitmap, binfo, i + 2);
 			if (i > 0) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
-				    "Unexpected first unset bit");
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i - 1),
+				    i, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 1), i + 2,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 2), i + 2,
 			    "Unexpected first unset bit");
 			if (i + 3 < nbits) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 3),
 				    nbits, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i + 2,
 			    "Unexpected first unset bit");
 		}
 	}
@@ -288,24 +296,24 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 * cases.
 	 */
 	if (nbits >= 3) {
-		bitmap_unset(bitmap, binfo, nbits-1);
-		for (size_t i = 0; i < nbits-1; i++) {
+		bitmap_unset(bitmap, binfo, nbits - 1);
+		for (size_t i = 0; i < nbits - 1; i++) {
 			bitmap_unset(bitmap, binfo, i);
 			if (i > 0) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
-				    "Unexpected first unset bit");
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i - 1),
+				    i, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
-			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
-			    nbits-1, "Unexpected first unset bit");
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 1),
+			    nbits - 1, "Unexpected first unset bit");
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1),
+			    nbits - 1, "Unexpected first unset bit");
 
 			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
 		}
-		expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
 		    "Unexpected first unset bit");
 	}
 
@@ -322,9 +330,10 @@ TEST_BEGIN(test_bitmap_xfu) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_xfu_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_xfu_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_xfu_body(&binfo, nbits);                           \
 	}
 	NBITS_TAB
 #undef NB
@@ -333,11 +342,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_bitmap_initializer,
-	    test_bitmap_size,
-	    test_bitmap_init,
-	    test_bitmap_set,
-	    test_bitmap_unset,
-	    test_bitmap_xfu);
+	return test(test_bitmap_initializer, test_bitmap_size, test_bitmap_init,
+	    test_bitmap_set, test_bitmap_unset, test_bitmap_xfu);
 }
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index d5e63a0e..643e430c 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -5,24 +5,24 @@
 #define TEST_BUF_SIZE 16
 #define UNIT_MAX (TEST_BUF_SIZE * 3)
 
-static size_t test_write_len;
-static char test_buf[TEST_BUF_SIZE];
+static size_t   test_write_len;
+static char     test_buf[TEST_BUF_SIZE];
 static uint64_t arg;
 static uint64_t arg_store;
 
 static void
 test_write_cb(void *cbopaque, const char *s) {
 	size_t prev_test_write_len = test_write_len;
-	test_write_len += strlen(s); /* only increase the length */
+	test_write_len += strlen(s);       /* only increase the length */
 	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
-	assert_zu_le(prev_test_write_len, test_write_len,
-	    "Test write overflowed");
+	assert_zu_le(
+	    prev_test_write_len, test_write_len, "Test write overflowed");
 }
 
 static void
 test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
-	char s[UNIT_MAX + 1];
-	size_t n_unit, remain, i;
+	char    s[UNIT_MAX + 1];
+	size_t  n_unit, remain, i;
 	ssize_t unit;
 
 	assert(buf_writer->buf != NULL);
@@ -41,7 +41,8 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
-					assert_u64_eq(arg_store, arg, "Call "
+					assert_u64_eq(arg_store, arg,
+					    "Call "
 					    "back argument didn't get through");
 					remain %= buf_writer->buf_size;
 					if (remain == 0) {
@@ -51,12 +52,14 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				}
 				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
-				    " of length %zu", i, unit);
+				    " of length %zu",
+				    i, unit);
 			}
 			buf_writer_flush(buf_writer);
 			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
-			    " writing %zu strings of length %zu", n_unit, unit);
+			    " writing %zu strings of length %zu",
+			    n_unit, unit);
 		}
 	}
 	buf_writer_terminate(tsdn, buf_writer);
@@ -64,9 +67,9 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 
 TEST_BEGIN(test_buf_write_static) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    test_buf, TEST_BUF_SIZE),
+	                 test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
@@ -74,22 +77,24 @@ TEST_END
 
 TEST_BEGIN(test_buf_write_dynamic) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
+	                 NULL, TEST_BUF_SIZE),
+	    "buf_writer_init() should not OOM");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
 TEST_END
 
 TEST_BEGIN(test_buf_write_oom) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	                NULL, SC_LARGE_MAXCLASS + 1),
+	    "buf_writer_init() should OOM");
 	assert(buf_writer.buf == NULL);
 
-	char s[UNIT_MAX + 1];
-	size_t n_unit, i;
+	char    s[UNIT_MAX + 1];
+	size_t  n_unit, i;
 	ssize_t unit;
 
 	memset(s, 'a', UNIT_MAX);
@@ -107,20 +112,22 @@ TEST_BEGIN(test_buf_write_oom) {
 				    "Call back argument didn't get through");
 				assert_zu_eq(test_write_len, i * unit,
 				    "Incorrect length after writing %zu strings"
-				    " of length %zu", i, unit);
+				    " of length %zu",
+				    i, unit);
 			}
 			buf_writer_flush(&buf_writer);
 			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
-			    " writing %zu strings of length %zu", n_unit, unit);
+			    " writing %zu strings of length %zu",
+			    n_unit, unit);
 		}
 	}
 	buf_writer_terminate(tsdn, &buf_writer);
 }
 TEST_END
 
-static int test_read_count;
-static size_t test_read_len;
+static int      test_read_count;
+static size_t   test_read_len;
 static uint64_t arg_sum;
 
 ssize_t
@@ -142,8 +149,8 @@ test_read_cb(void *cbopaque, void *buf, size_t limit) {
 		memset(buf, 'a', read_len);
 		size_t prev_test_read_len = test_read_len;
 		test_read_len += read_len;
-		assert_zu_le(prev_test_read_len, test_read_len,
-		    "Test read overflowed");
+		assert_zu_le(
+		    prev_test_read_len, test_read_len, "Test read overflowed");
 		return read_len;
 	}
 }
@@ -168,9 +175,9 @@ test_buf_writer_pipe_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 
 TEST_BEGIN(test_buf_write_pipe) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    test_buf, TEST_BUF_SIZE),
+	                 test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_pipe_body(tsdn, &buf_writer);
 }
@@ -178,19 +185,16 @@ TEST_END
 
 TEST_BEGIN(test_buf_write_pipe_oom) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	                NULL, SC_LARGE_MAXCLASS + 1),
+	    "buf_writer_init() should OOM");
 	test_buf_writer_pipe_body(tsdn, &buf_writer);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_buf_write_static,
-	    test_buf_write_dynamic,
-	    test_buf_write_oom,
-	    test_buf_write_pipe,
-	    test_buf_write_pipe_oom);
+	return test(test_buf_write_static, test_buf_write_dynamic,
+	    test_buf_write_oom, test_buf_write_pipe, test_buf_write_pipe_oom);
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 3b6dbab3..dc1dbe36 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -1,39 +1,37 @@
 #include "test/jemalloc_test.h"
 
 static void
-do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t ncached_max, cache_bin_sz_t nfill_attempt,
-    cache_bin_sz_t nfill_succeed) {
-	bool success;
+do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
+    cache_bin_sz_t nfill_attempt, cache_bin_sz_t nfill_succeed) {
+	bool  success;
 	void *ptr;
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
-	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill_attempt);
+	cache_bin_init_ptr_array_for_fill(bin, &arr, nfill_attempt);
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
-	cache_bin_finish_fill(bin, info, &arr, nfill_succeed);
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill_succeed,
-	    "");
+	cache_bin_finish_fill(bin, &arr, nfill_succeed);
+	expect_true(cache_bin_ncached_get_local(bin) == nfill_succeed, "");
 	cache_bin_low_water_set(bin);
 
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
 		ptr = cache_bin_alloc(bin, &success);
 		expect_true(success, "");
-		expect_ptr_eq(ptr, (void *)&ptrs[i],
-		    "Should pop in order filled");
-		expect_true(cache_bin_low_water_get(bin, info)
-		    == nfill_succeed - i - 1, "");
+		expect_ptr_eq(
+		    ptr, (void *)&ptrs[i], "Should pop in order filled");
+		expect_true(
+		    cache_bin_low_water_get(bin) == nfill_succeed - i - 1, "");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0, "");
-	expect_true(cache_bin_low_water_get(bin, info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(bin) == 0, "");
+	expect_true(cache_bin_low_water_get(bin) == 0, "");
 }
 
 static void
-do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, cache_bin_sz_t nflush) {
+do_flush_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
+    cache_bin_sz_t nflush) {
 	bool success;
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		success = cache_bin_dalloc_easy(bin, &ptrs[i]);
@@ -41,30 +39,29 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nflush);
-	cache_bin_init_ptr_array_for_flush(bin, info, &arr, nflush);
+	cache_bin_init_ptr_array_for_flush(bin, &arr, nflush);
 	for (cache_bin_sz_t i = 0; i < nflush; i++) {
 		expect_ptr_eq(arr.ptr[i], &ptrs[nflush - i - 1], "");
 	}
-	cache_bin_finish_flush(bin, info, &arr, nflush);
+	cache_bin_finish_flush(bin, &arr, nflush);
 
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill - nflush,
-	    "");
-	while (cache_bin_ncached_get_local(bin, info) > 0) {
+	expect_true(cache_bin_ncached_get_local(bin) == nfill - nflush, "");
+	while (cache_bin_ncached_get_local(bin) > 0) {
 		cache_bin_alloc(bin, &success);
 	}
 }
 
 static void
-do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, size_t batch) {
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+do_batch_alloc_test(
+    cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill, size_t batch) {
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
-	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill);
+	cache_bin_init_ptr_array_for_fill(bin, &arr, nfill);
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
-	cache_bin_finish_fill(bin, info, &arr, nfill);
-	assert_true(cache_bin_ncached_get_local(bin, info) == nfill, "");
+	cache_bin_finish_fill(bin, &arr, nfill);
+	assert_true(cache_bin_ncached_get_local(bin) == nfill, "");
 	cache_bin_low_water_set(bin);
 
 	void **out = malloc((batch + 1) * sizeof(void *));
@@ -73,9 +70,9 @@ do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	for (cache_bin_sz_t i = 0; i < (cache_bin_sz_t)n; i++) {
 		expect_ptr_eq(out[i], &ptrs[i], "");
 	}
-	expect_true(cache_bin_low_water_get(bin, info) == nfill -
-	    (cache_bin_sz_t)n, "");
-	while (cache_bin_ncached_get_local(bin, info) > 0) {
+	expect_true(
+	    cache_bin_low_water_get(bin) == nfill - (cache_bin_sz_t)n, "");
+	while (cache_bin_ncached_get_local(bin) > 0) {
 		bool success;
 		cache_bin_alloc(bin, &success);
 	}
@@ -93,14 +90,14 @@ test_bin_init(cache_bin_t *bin, cache_bin_info_t *info) {
 	size_t cur_offset = 0;
 	cache_bin_preincrement(info, 1, mem, &cur_offset);
 	cache_bin_init(bin, info, mem, &cur_offset);
-	cache_bin_postincrement(info, 1, mem, &cur_offset);
+	cache_bin_postincrement(mem, &cur_offset);
 	assert_zu_eq(cur_offset, size, "Should use all requested memory");
 }
 
 TEST_BEGIN(test_cache_bin) {
 	const int ncached_max = 100;
-	bool success;
-	void *ptr;
+	bool      success;
+	void     *ptr;
 
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
@@ -108,9 +105,9 @@ TEST_BEGIN(test_cache_bin) {
 	test_bin_init(&bin, &info);
 
 	/* Initialize to empty; should then have 0 elements. */
-	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
-	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
+	expect_d_eq(ncached_max, cache_bin_ncached_max_get(&bin), "");
+	expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
+	expect_true(cache_bin_low_water_get(&bin) == 0, "");
 
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
@@ -126,26 +123,25 @@ TEST_BEGIN(test_cache_bin) {
 	 */
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
-	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, &info) == i, "");
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		expect_true(cache_bin_ncached_get_local(&bin) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
 		    "Should be able to dalloc into a non-full cache bin.");
-		expect_true(cache_bin_low_water_get(&bin, &info) == 0,
+		expect_true(cache_bin_low_water_get(&bin) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
-	    "");
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max, "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
 
 	cache_bin_low_water_set(&bin);
 
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_low_water_get(&bin, &info)
-		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
-		    == ncached_max - i, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
 		 * water mark.
@@ -153,23 +149,24 @@ TEST_BEGIN(test_cache_bin) {
 		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
-		expect_true(cache_bin_low_water_get(&bin, &info)
-		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
-		    == ncached_max - i, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i, "");
 
 		/* This should succeed, though. */
 		ptr = cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
-		expect_true(cache_bin_low_water_get(&bin, &info)
-		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
-		    == ncached_max - i - 1, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i - 1, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i - 1,
+		    "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
@@ -185,8 +182,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
-	    "");
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max, "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
 		 * Size is bigger than low water -- the reduced version should
@@ -202,73 +198,67 @@ TEST_BEGIN(test_cache_bin) {
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
-	while (cache_bin_ncached_get_local(&bin, &info)) {
+	while (cache_bin_ncached_get_local(&bin)) {
 		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}
 
 	/* Test fill. */
 	/* Try to fill all, succeed fully. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, ncached_max);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, ncached_max);
 	/* Try to fill all, succeed partially. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max,
-	    ncached_max / 2);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, ncached_max / 2);
 	/* Try to fill all, fail completely. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, 0);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, 0);
 
 	/* Try to fill some, succeed fully. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
-	    ncached_max / 2);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, ncached_max / 2);
 	/* Try to fill some, succeed partially. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
-	    ncached_max / 4);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, ncached_max / 4);
 	/* Try to fill some, fail completely. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2, 0);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, 0);
 
-	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max);
-	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
-	do_flush_test(&bin, &info, ptrs, ncached_max, 0);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
+	do_flush_test(&bin, ptrs, ncached_max, ncached_max);
+	do_flush_test(&bin, ptrs, ncached_max, ncached_max / 2);
+	do_flush_test(&bin, ptrs, ncached_max, 0);
+	do_flush_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_test(&bin, ptrs, ncached_max / 2, 0);
 
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max * 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
-	    ncached_max / 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
-	    ncached_max / 4);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 0);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max * 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max / 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 1);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 0);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 1);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 0);
+	do_batch_alloc_test(&bin, ptrs, 2, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, 2, 2);
+	do_batch_alloc_test(&bin, ptrs, 2, 1);
+	do_batch_alloc_test(&bin, ptrs, 2, 0);
+	do_batch_alloc_test(&bin, ptrs, 1, 2);
+	do_batch_alloc_test(&bin, ptrs, 1, 1);
+	do_batch_alloc_test(&bin, ptrs, 1, 0);
+	do_batch_alloc_test(&bin, ptrs, 0, 2);
+	do_batch_alloc_test(&bin, ptrs, 0, 1);
+	do_batch_alloc_test(&bin, ptrs, 0, 0);
 
 	free(ptrs);
 }
 TEST_END
 
 static void
-do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, cache_bin_sz_t nstash) {
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
-	    "Bin not empty");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
-	    "Bin not empty");
-	expect_true(nfill + nstash <= info->ncached_max, "Exceeded max");
+do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
+    cache_bin_sz_t nstash) {
+	expect_true(cache_bin_ncached_get_local(bin) == 0, "Bin not empty");
+	expect_true(cache_bin_nstashed_get_local(bin) == 0, "Bin not empty");
+	expect_true(
+	    nfill + nstash <= bin->bin_info.ncached_max, "Exceeded max");
 
 	bool ret;
 	/* Fill */
@@ -276,18 +266,18 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		ret = cache_bin_dalloc_easy(bin, &ptrs[i]);
 		expect_true(ret, "Unexpected fill failure");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill,
-	    "Wrong cached count");
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == nfill, "Wrong cached count");
 
 	/* Stash */
 	for (cache_bin_sz_t i = 0; i < nstash; i++) {
 		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
 		expect_true(ret, "Unexpected stash failure");
 	}
-	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
-	    "Wrong stashed count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == nstash, "Wrong stashed count");
 
-	if (nfill + nstash == info->ncached_max) {
+	if (nfill + nstash == bin->bin_info.ncached_max) {
 		ret = cache_bin_dalloc_easy(bin, &ptrs[0]);
 		expect_false(ret, "Should not dalloc into a full bin");
 		ret = cache_bin_stash(bin, &ptrs[0]);
@@ -302,20 +292,20 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		expect_true((uintptr_t)ptr < (uintptr_t)&ptrs[nfill],
 		    "Should not alloc stashed ptrs");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
-	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
-	    "Wrong stashed count");
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == 0, "Wrong cached count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == nstash, "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
 	expect_false(ret, "Should not alloc stashed");
 
 	/* Clear stashed ones */
-	cache_bin_finish_flush_stashed(bin, info);
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
-	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
-	    "Wrong stashed count");
+	cache_bin_finish_flush_stashed(bin);
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == 0, "Wrong cached count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == 0, "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
 	expect_false(ret, "Should not alloc from empty bin");
@@ -324,7 +314,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 TEST_BEGIN(test_cache_bin_stash) {
 	const int ncached_max = 100;
 
-	cache_bin_t bin;
+	cache_bin_t      bin;
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
 	test_bin_init(&bin, &info);
@@ -337,15 +327,17 @@ TEST_BEGIN(test_cache_bin_stash) {
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	bool ret;
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, &info) ==
-		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get_local(&bin, &info) == i / 2,
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == (i / 2 + i % 2),
+		    "Wrong ncached value");
+		expect_true(cache_bin_nstashed_get_local(&bin) == i / 2,
 		    "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		} else {
 			ret = cache_bin_stash(&bin, &ptrs[i]);
-			expect_true(ret, "Should be able to stash into a "
+			expect_true(ret,
+			    "Should be able to stash into a "
 			    "non-full cache bin");
 		}
 	}
@@ -362,23 +354,23 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get_local(&bin, &info) ==
-			    ncached_max / 2, "Wrong nstashed value");
+			expect_true(cache_bin_nstashed_get_local(&bin)
+			        == ncached_max / 2,
+			    "Wrong nstashed value");
 		}
 	}
 
 	test_bin_init(&bin, &info);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max, 0);
-	do_flush_stashed_test(&bin, &info, ptrs, 0, ncached_max);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 2);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 4);
+	do_flush_stashed_test(&bin, ptrs, ncached_max, 0);
+	do_flush_stashed_test(&bin, ptrs, 0, ncached_max);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4, ncached_max / 2);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4, ncached_max / 4);
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_cache_bin,
-		test_cache_bin_stash);
+	return test(test_cache_bin, test_cache_bin_stash);
 }
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 36142acd..f07892ac 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,55 +2,51 @@
 
 TEST_BEGIN(test_new_delete) {
 	tsd_t *tsd;
-	ckh_t ckh;
+	ckh_t  ckh;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
-	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
+	    "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 
-	expect_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
-	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
+	expect_false(
+	    ckh_new(tsd, &ckh, 3, ckh_pointer_hash, ckh_pointer_keycomp),
+	    "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove) {
-	tsd_t *tsd;
-	ckh_t ckh;
-	const char *strs[] = {
-	    "a string",
-	    "A string",
-	    "a string.",
-	    "A string."
-	};
+	tsd_t      *tsd;
+	ckh_t       ckh;
+	const char *strs[] = {"a string", "A string", "a string.", "A string."};
 	const char *missing = "A string not in the hash table.";
-	size_t i;
+	size_t      i;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
-	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
+	    "Unexpected ckh_new() error");
 	expect_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		ckh_insert(tsd, &ckh, strs[i], strs[i]);
-		expect_zu_eq(ckh_count(&ckh), i+1,
-		    "ckh_count() should return %zu, but it returned %zu", i+1,
+		expect_zu_eq(ckh_count(&ckh), i + 1,
+		    "ckh_count() should return %zu, but it returned %zu", i + 1,
 		    ckh_count(&ckh));
 	}
 
 	/* Search. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		union {
-			void *p;
+			void       *p;
 			const char *s;
 		} k, v;
-		void **kp, **vp;
+		void      **kp, **vp;
 		const char *ks, *vs;
 
 		kp = (i & 1) ? &k.p : NULL;
@@ -62,21 +58,21 @@ TEST_BEGIN(test_count_insert_search_remove) {
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
-		    i);
-		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
-		    i);
+		expect_ptr_eq(
+		    (void *)ks, (void *)k.s, "Key mismatch, i=%zu", i);
+		expect_ptr_eq(
+		    (void *)vs, (void *)v.s, "Value mismatch, i=%zu", i);
 	}
 	expect_true(ckh_search(&ckh, missing, NULL, NULL),
 	    "Unexpected ckh_search() success");
 
 	/* Remove. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		union {
-			void *p;
+			void       *p;
 			const char *s;
 		} k, v;
-		void **kp, **vp;
+		void      **kp, **vp;
 		const char *ks, *vs;
 
 		kp = (i & 1) ? &k.p : NULL;
@@ -88,14 +84,14 @@ TEST_BEGIN(test_count_insert_search_remove) {
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
-		    i);
-		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
-		    i);
+		expect_ptr_eq(
+		    (void *)ks, (void *)k.s, "Key mismatch, i=%zu", i);
+		expect_ptr_eq(
+		    (void *)vs, (void *)v.s, "Value mismatch, i=%zu", i);
 		expect_zu_eq(ckh_count(&ckh),
-		    sizeof(strs)/sizeof(const char *) - i - 1,
+		    sizeof(strs) / sizeof(const char *) - i - 1,
 		    "ckh_count() should return %zu, but it returned %zu",
-		        sizeof(strs)/sizeof(const char *) - i - 1,
+		    sizeof(strs) / sizeof(const char *) - i - 1,
 		    ckh_count(&ckh));
 	}
 
@@ -106,18 +102,19 @@ TEST_END
 TEST_BEGIN(test_insert_iter_remove) {
 #define NITEMS ZU(1000)
 	tsd_t *tsd;
-	ckh_t ckh;
+	ckh_t  ckh;
 	void **p[NITEMS];
-	void *q, *r;
+	void  *q, *r;
 	size_t i;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
-	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
+	expect_false(
+	    ckh_new(tsd, &ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp),
+	    "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
-		p[i] = mallocx(i+1, 0);
+		p[i] = mallocx(i + 1, 0);
 		expect_ptr_not_null(p[i], "Unexpected mallocx() failure");
 	}
 
@@ -151,7 +148,7 @@ TEST_BEGIN(test_insert_iter_remove) {
 		}
 
 		{
-			bool seen[NITEMS];
+			bool   seen[NITEMS];
 			size_t tabind;
 
 			memset(seen, 0, sizeof(seen));
@@ -195,8 +192,8 @@ TEST_BEGIN(test_insert_iter_remove) {
 	}
 
 	expect_zu_eq(ckh_count(&ckh), 0,
-	    "ckh_count() should return %zu, but it returned %zu",
-	    ZU(0), ckh_count(&ckh));
+	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
+	    ckh_count(&ckh));
 	ckh_delete(tsd, &ckh);
 #undef NITEMS
 }
@@ -204,8 +201,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_new_delete,
-	    test_count_insert_search_remove,
+	return test(test_new_delete, test_count_insert_search_remove,
 	    test_insert_iter_remove);
 }
diff --git a/test/unit/conf.c b/test/unit/conf.c
new file mode 100644
index 00000000..1a1cde7c
--- /dev/null
+++ b/test/unit/conf.c
@@ -0,0 +1,113 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/conf.h"
+
+TEST_BEGIN(test_conf_next_simple) {
+	const char *opts = "key:value";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end");
+	expect_zu_eq(klen, 3, "Key length should be 3");
+	expect_false(strncmp(k, "key", klen), "Key should be \"key\"");
+	expect_zu_eq(vlen, 5, "Value length should be 5");
+	expect_false(strncmp(v, "value", vlen), "Value should be \"value\"");
+	expect_false(had_conf_error, "Should not have had an error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_multi) {
+	const char *opts = "k1:v1,k2:v2";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+	bool        end;
+
+	had_conf_error = false;
+
+	end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end after first pair");
+	expect_zu_eq(klen, 2, "First key length should be 2");
+	expect_false(strncmp(k, "k1", klen), "First key should be \"k1\"");
+	expect_zu_eq(vlen, 2, "First value length should be 2");
+	expect_false(strncmp(v, "v1", vlen), "First value should be \"v1\"");
+
+	end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end after second pair");
+	expect_zu_eq(klen, 2, "Second key length should be 2");
+	expect_false(strncmp(k, "k2", klen), "Second key should be \"k2\"");
+	expect_zu_eq(vlen, 2, "Second value length should be 2");
+	expect_false(strncmp(v, "v2", vlen), "Second value should be \"v2\"");
+
+	expect_false(had_conf_error, "Should not have had an error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_empty) {
+	const char *opts = "";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Empty string should return true (end)");
+	expect_false(had_conf_error, "Empty string should not set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_missing_value) {
+	const char *opts = "key_only";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Key without value should return true (end)");
+	expect_true(had_conf_error, "Key without value should set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_malformed) {
+	const char *opts = "bad!key:val";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Malformed key should return true (end)");
+	expect_true(had_conf_error, "Malformed key should set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_trailing_comma) {
+	const char *opts = "k:v,";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should parse the first pair successfully");
+	expect_true(had_conf_error,
+	    "Trailing comma should set error");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_conf_next_simple, test_conf_next_multi,
+	    test_conf_next_empty, test_conf_next_missing_value,
+	    test_conf_next_malformed, test_conf_next_trailing_comma);
+}
diff --git a/test/unit/conf_init_0.c b/test/unit/conf_init_0.c
new file mode 100644
index 00000000..a1f0e63f
--- /dev/null
+++ b/test/unit/conf_init_0.c
@@ -0,0 +1,22 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_default_dirty_decay_ms) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 10000,
+	    "dirty_decay_ms should be the default (10000)"
+	    " when no global variables are set");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_default_dirty_decay_ms);
+}
diff --git a/test/unit/conf_init_1.c b/test/unit/conf_init_1.c
new file mode 100644
index 00000000..07aec5dc
--- /dev/null
+++ b/test/unit/conf_init_1.c
@@ -0,0 +1,23 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "dirty_decay_ms:1234";
+
+TEST_BEGIN(test_malloc_conf_dirty_decay_ms) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 1234,
+	    "dirty_decay_ms should be 1234 (set via malloc_conf)");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_malloc_conf_dirty_decay_ms);
+}
diff --git a/test/unit/conf_init_confirm.c b/test/unit/conf_init_confirm.c
new file mode 100644
index 00000000..a4358359
--- /dev/null
+++ b/test/unit/conf_init_confirm.c
@@ -0,0 +1,39 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "dirty_decay_ms:1234,confirm_conf:true";
+
+TEST_BEGIN(test_confirm_conf_two_pass) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	bool confirm_conf;
+	size_t sz = sizeof(confirm_conf);
+
+	int err = mallctl("opt.confirm_conf", &confirm_conf, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_true(confirm_conf,
+	    "confirm_conf should be true (processed in pass 1)");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_option_applied_in_second_pass) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 1234,
+	    "dirty_decay_ms should be 1234 (processed in pass 2)");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_confirm_conf_two_pass,
+	    test_conf_option_applied_in_second_pass);
+}
diff --git a/test/unit/conf_parse.c b/test/unit/conf_parse.c
new file mode 100644
index 00000000..448cc84a
--- /dev/null
+++ b/test/unit/conf_parse.c
@@ -0,0 +1,89 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/conf.h"
+
+TEST_BEGIN(test_conf_handle_bool_true) {
+	bool result = false;
+	bool err = conf_handle_bool("true", sizeof("true") - 1, &result);
+	expect_false(err, "conf_handle_bool should succeed for \"true\"");
+	expect_true(result, "result should be true");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_bool_false) {
+	bool result = true;
+	bool err = conf_handle_bool("false", sizeof("false") - 1, &result);
+	expect_false(err, "conf_handle_bool should succeed for \"false\"");
+	expect_false(result, "result should be false");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_bool_invalid) {
+	bool result = false;
+	bool err = conf_handle_bool("yes", sizeof("yes") - 1, &result);
+	expect_true(err, "conf_handle_bool should fail for \"yes\"");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_valid) {
+	intmax_t result = 0;
+	bool     err = conf_handle_signed("5000", sizeof("5000") - 1, -1,
+	        INTMAX_MAX, true, false, false, &result);
+	expect_false(err, "Should succeed for valid value");
+	expect_d64_eq((int64_t)result, 5000, "result should be 5000");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_negative) {
+	intmax_t result = 0;
+	bool err = conf_handle_signed("-1", sizeof("-1") - 1, -1, INTMAX_MAX,
+	    true, false, false, &result);
+	expect_false(err, "Should succeed for -1");
+	expect_d64_eq((int64_t)result, -1, "result should be -1");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_out_of_range) {
+	intmax_t result = 0;
+	bool     err = conf_handle_signed(
+            "5000", sizeof("5000") - 1, -1, 4999, true, true, false, &result);
+	expect_true(err, "Should fail for out-of-range value");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_char_p) {
+	char buf[8];
+	bool err;
+
+	/* Normal copy. */
+	err = conf_handle_char_p(
+	    "hello", sizeof("hello") - 1, buf, sizeof(buf));
+	expect_false(err, "Should succeed");
+	expect_str_eq(buf, "hello", "Should copy string");
+
+	/* Truncation. */
+	err = conf_handle_char_p(
+	    "longstring", sizeof("longstring") - 1, buf, sizeof(buf));
+	expect_false(err, "Should succeed even when truncating");
+	expect_str_eq(buf, "longstr", "Should truncate to dest_sz - 1");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_char_p_zero_dest_sz) {
+	char buf[4] = {'X', 'Y', 'Z', '\0'};
+	bool err;
+
+	err = conf_handle_char_p("abc", sizeof("abc") - 1, buf, 0);
+	expect_false(err, "Should succeed for zero-sized destination");
+	expect_c_eq(buf[0], 'X', "Zero-sized destination must not be modified");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_conf_handle_bool_true, test_conf_handle_bool_false,
+	    test_conf_handle_bool_invalid, test_conf_handle_signed_valid,
+	    test_conf_handle_signed_negative,
+	    test_conf_handle_signed_out_of_range, test_conf_handle_char_p,
+	    test_conf_handle_char_p_zero_dest_sz);
+}
diff --git a/test/unit/counter.c b/test/unit/counter.c
index 277baac1..04100daa 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -11,7 +11,7 @@ TEST_BEGIN(test_counter_accum) {
 	counter_accum_init(&c, interval);
 
 	tsd_t *tsd = tsd_fetch();
-	bool trigger;
+	bool   trigger;
 	for (unsigned i = 0; i < n; i++) {
 		trigger = counter_accum(tsd_tsdn(tsd), &c, increment);
 		accum += increment;
@@ -39,8 +39,8 @@ static void *
 thd_start(void *varg) {
 	counter_accum_t *c = (counter_accum_t *)varg;
 
-	tsd_t *tsd = tsd_fetch();
-	bool trigger;
+	tsd_t    *tsd = tsd_fetch();
+	bool      trigger;
 	uintptr_t n_triggered = 0;
 	for (unsigned i = 0; i < N_ITER_THD; i++) {
 		trigger = counter_accum(tsd_tsdn(tsd), c, ITER_INCREMENT);
@@ -50,12 +50,11 @@ thd_start(void *varg) {
 	return (void *)n_triggered;
 }
 
-
 TEST_BEGIN(test_counter_mt) {
 	counter_accum_t shared_c;
 	counter_accum_init(&shared_c, interval);
 
-	thd_t thds[N_THDS];
+	thd_t    thds[N_THDS];
 	unsigned i;
 	for (i = 0; i < N_THDS; i++) {
 		thd_create(&thds[i], thd_start, (void *)&shared_c);
@@ -74,7 +73,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_counter_accum,
-	    test_counter_mt);
+	return test(test_counter_accum, test_counter_mt);
 }
diff --git a/test/unit/decay.c b/test/unit/decay.c
index bdb6d0a3..10740a85 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -22,12 +22,11 @@ TEST_BEGIN(test_decay_init) {
 TEST_END
 
 TEST_BEGIN(test_decay_ms_valid) {
-	expect_false(decay_ms_valid(-7),
-	    "Misclassified negative decay as valid");
+	expect_false(
+	    decay_ms_valid(-7), "Misclassified negative decay as valid");
 	expect_true(decay_ms_valid(-1),
 	    "Misclassified -1 (never decay) as invalid decay");
-	expect_true(decay_ms_valid(8943),
-	    "Misclassified valid decay");
+	expect_true(decay_ms_valid(8943), "Misclassified valid decay");
 	if (SSIZE_MAX > NSTIME_SEC_MAX) {
 		expect_false(
 		    decay_ms_valid((ssize_t)(NSTIME_SEC_MAX * KQU(1000) + 39)),
@@ -111,12 +110,12 @@ TEST_BEGIN(test_decay_empty) {
 	assert_false(err, "");
 
 	uint64_t time_between_calls = decay_epoch_duration_ns(&decay) / 5;
-	int nepochs = 0;
+	int      nepochs = 0;
 	for (uint64_t i = 0; i < decay_ns / time_between_calls * 10; i++) {
 		size_t dirty_pages = 0;
 		nstime_init(&curtime, i * time_between_calls);
-		bool epoch_advanced = decay_maybe_advance_epoch(&decay,
-		    &curtime, dirty_pages);
+		bool epoch_advanced = decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 		if (epoch_advanced) {
 			nepochs++;
 			expect_zu_eq(decay_npages_limit_get(&decay), 0,
@@ -158,30 +157,32 @@ TEST_BEGIN(test_decay) {
 	nstime_init(&epochtime, decay_epoch_duration_ns(&decay));
 
 	const size_t dirty_pages_per_epoch = 1000;
-	size_t dirty_pages = 0;
-	uint64_t epoch_ns = decay_epoch_duration_ns(&decay);
-	bool epoch_advanced = false;
+	size_t       dirty_pages = 0;
+	uint64_t     epoch_ns = decay_epoch_duration_ns(&decay);
+	bool         epoch_advanced = false;
 
 	/* Populate backlog with some dirty pages */
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
 		dirty_pages += dirty_pages_per_epoch;
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
 	size_t npages_limit = decay_npages_limit_get(&decay);
-	expect_zu_gt(npages_limit, 0, "npages_limit is incorrectly equal "
+	expect_zu_gt(npages_limit, 0,
+	    "npages_limit is incorrectly equal "
 	    "to zero after dirty pages have been added");
 
 	/* Keep dirty pages unchanged and verify that npages_limit decreases */
 	for (uint64_t i = nepoch_init; i * epoch_ns < decay_ns; ++i) {
 		nstime_add(&curtime, &epochtime);
-		epoch_advanced = decay_maybe_advance_epoch(&decay, &curtime,
-				    dirty_pages);
+		epoch_advanced = decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 		if (epoch_advanced) {
-			size_t npages_limit_new = decay_npages_limit_get(&decay);
+			size_t npages_limit_new = decay_npages_limit_get(
+			    &decay);
 			expect_zu_lt(npages_limit_new, npages_limit,
 			    "napges_limit failed to decay");
 
@@ -189,20 +190,22 @@ TEST_BEGIN(test_decay) {
 		}
 	}
 
-	expect_zu_gt(npages_limit, 0, "npages_limit decayed to zero earlier "
+	expect_zu_gt(npages_limit, 0,
+	    "npages_limit decayed to zero earlier "
 	    "than decay_ms since last dirty page was added");
 
 	/* Completely push all dirty pages out of the backlog */
 	epoch_advanced = false;
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
 	npages_limit = decay_npages_limit_get(&decay);
-	expect_zu_eq(npages_limit, 0, "npages_limit didn't decay to 0 after "
+	expect_zu_eq(npages_limit, 0,
+	    "npages_limit didn't decay to 0 after "
 	    "decay_ms since last bump in dirty pages");
 }
 TEST_END
@@ -230,29 +233,29 @@ TEST_BEGIN(test_decay_ns_until_purge) {
 	    "Failed to return unbounded wait time for zero threshold");
 
 	const size_t dirty_pages_per_epoch = 1000;
-	size_t dirty_pages = 0;
-	bool epoch_advanced = false;
+	size_t       dirty_pages = 0;
+	bool         epoch_advanced = false;
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
 		dirty_pages += dirty_pages_per_epoch;
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
-	uint64_t ns_until_purge_all = decay_ns_until_purge(&decay,
-	    dirty_pages, dirty_pages);
+	uint64_t ns_until_purge_all = decay_ns_until_purge(
+	    &decay, dirty_pages, dirty_pages);
 	expect_u64_ge(ns_until_purge_all, decay_ns,
 	    "Incorrectly calculated time to purge all pages");
 
-	uint64_t ns_until_purge_none = decay_ns_until_purge(&decay,
-	    dirty_pages, 0);
+	uint64_t ns_until_purge_none = decay_ns_until_purge(
+	    &decay, dirty_pages, 0);
 	expect_u64_eq(ns_until_purge_none, decay_epoch_duration_ns(&decay) * 2,
 	    "Incorrectly calculated time to purge 0 pages");
 
 	uint64_t npages_threshold = dirty_pages / 2;
-	uint64_t ns_until_purge_half = decay_ns_until_purge(&decay,
-	    dirty_pages, npages_threshold);
+	uint64_t ns_until_purge_half = decay_ns_until_purge(
+	    &decay, dirty_pages, npages_threshold);
 
 	nstime_t waittime;
 	nstime_init(&waittime, ns_until_purge_half);
@@ -263,7 +266,7 @@ TEST_BEGIN(test_decay_ns_until_purge) {
 	expect_zu_lt(npages_limit, dirty_pages,
 	    "npages_limit failed to decrease after waiting");
 	size_t expected = dirty_pages - npages_limit;
-	int deviation = abs((int)expected - (int)(npages_threshold));
+	int    deviation = abs((int)expected - (int)(npages_threshold));
 	expect_d_lt(deviation, (int)(npages_threshold / 2),
 	    "After waiting, number of pages is out of the expected interval "
 	    "[0.5 * npages_threshold .. 1.5 * npages_threshold]");
@@ -272,12 +275,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_decay_init,
-	    test_decay_ms_valid,
-	    test_decay_npages_purge_in,
-	    test_decay_maybe_advance_epoch,
-	    test_decay_empty,
-	    test_decay,
-	    test_decay_ns_until_purge);
+	return test(test_decay_init, test_decay_ms_valid,
+	    test_decay_npages_purge_in, test_decay_maybe_advance_epoch,
+	    test_decay_empty, test_decay, test_decay_ns_until_purge);
 }
diff --git a/test/unit/div.c b/test/unit/div.c
index 29aea665..53447f4a 100644
--- a/test/unit/div.c
+++ b/test/unit/div.c
@@ -11,12 +11,12 @@ TEST_BEGIN(test_div_exhaustive) {
 			max = 1000 * 1000;
 		}
 		for (size_t dividend = 0; dividend < 1000 * divisor;
-		    dividend += divisor) {
-			size_t quotient = div_compute(
-			    &div_info, dividend);
+		     dividend += divisor) {
+			size_t quotient = div_compute(&div_info, dividend);
 			expect_zu_eq(dividend, quotient * divisor,
 			    "With divisor = %zu, dividend = %zu, "
-			    "got quotient %zu", divisor, dividend, quotient);
+			    "got quotient %zu",
+			    divisor, dividend, quotient);
 		}
 	}
 }
@@ -24,6 +24,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_div_exhaustive);
+	return test_no_reentrancy(test_div_exhaustive);
 }
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index 12122c1b..4bd6ab73 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -4,23 +4,33 @@
 #include "jemalloc/internal/safety_check.h"
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
 
-void
-test_large_double_free_pre(void) {
+static void
+test_double_free_pre(void) {
 	safety_check_set_abort(&fake_abort);
 	fake_abort_called = false;
 }
 
-void
-test_large_double_free_post() {
+static void
+test_double_free_post(void) {
 	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
 
+static bool
+tcache_enabled(void) {
+	bool   enabled;
+	size_t sz = sizeof(enabled);
+	assert_d_eq(mallctl("thread.tcache.enabled", &enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	return enabled;
+}
+
 TEST_BEGIN(test_large_double_free_tcache) {
 	test_skip_if(!config_opt_safety_checks);
 	/*
@@ -29,9 +39,9 @@ TEST_BEGIN(test_large_double_free_tcache) {
 	 */
 	test_skip_if(config_debug);
 
-	test_large_double_free_pre();
+	test_double_free_pre();
 	char *ptr = malloc(SC_LARGE_MINCLASS);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
 	if (!guarded) {
 		free(ptr);
@@ -44,7 +54,7 @@ TEST_BEGIN(test_large_double_free_tcache) {
 		fake_abort_called = true;
 	}
 	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
-	test_large_double_free_post();
+	test_double_free_post();
 }
 TEST_END
 
@@ -52,9 +62,9 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 	test_skip_if(!config_opt_safety_checks);
 	test_skip_if(config_debug);
 
-	test_large_double_free_pre();
+	test_double_free_pre();
 	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 	if (!guarded) {
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
@@ -66,12 +76,66 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 		 */
 		fake_abort_called = true;
 	}
-	test_large_double_free_post();
+	test_double_free_post();
+}
+TEST_END
+
+TEST_BEGIN(test_small_double_free_tcache) {
+	test_skip_if(!config_debug);
+	test_skip_if(opt_debug_double_free_max_scan == 0);
+	test_skip_if(!tcache_enabled());
+
+	test_double_free_pre();
+	char *ptr = malloc(1);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	free(ptr);
+	if (!guarded) {
+		free(ptr);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+	test_double_free_post();
+}
+TEST_END
+
+TEST_BEGIN(test_small_double_free_arena) {
+	test_skip_if(!config_debug);
+	test_skip_if(!tcache_enabled());
+
+	test_double_free_pre();
+	/*
+	 * Allocate one more pointer to keep the slab partially used after
+	 * flushing the cache.
+	 */
+	char *ptr1 = malloc(1);
+	char *ptr = malloc(1);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	free(ptr);
+	if (!guarded) {
+		mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+		free(ptr);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
+	test_double_free_post();
+	free(ptr1);
 }
 TEST_END
 
 int
 main(void) {
 	return test(test_large_double_free_no_tcache,
-	    test_large_double_free_tcache);
+	    test_large_double_free_tcache, test_small_double_free_tcache,
+	    test_small_double_free_arena);
 }
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index af1110a9..16ed58b2 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -49,16 +49,16 @@ TEST_END
 
 static size_t
 ecf_count(edata_cache_fast_t *ecf) {
-	size_t count = 0;
+	size_t   count = 0;
 	edata_t *cur;
-	ql_foreach(cur, &ecf->list.head, ql_link_inactive) {
+	ql_foreach (cur, &ecf->list.head, ql_link_inactive) {
 		count++;
 	}
 	return count;
 }
 
 TEST_BEGIN(test_edata_cache_fast_simple) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -96,7 +96,7 @@ TEST_BEGIN(test_edata_cache_fast_simple) {
 TEST_END
 
 TEST_BEGIN(test_edata_cache_fill) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -179,7 +179,7 @@ TEST_BEGIN(test_edata_cache_fill) {
 TEST_END
 
 TEST_BEGIN(test_edata_cache_disable) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -198,7 +198,8 @@ TEST_BEGIN(test_edata_cache_disable) {
 
 	expect_zu_eq(0, ecf_count(&ecf), "");
 	expect_zu_eq(EDATA_CACHE_FAST_FILL,
-	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "Disabling should flush");
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabling should flush");
 
 	edata_t *edata = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_zu_eq(0, ecf_count(&ecf), "");
@@ -218,9 +219,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_edata_cache,
-	    test_edata_cache_fast_simple,
-	    test_edata_cache_fill,
-	    test_edata_cache_disable);
+	return test(test_edata_cache, test_edata_cache_fast_simple,
+	    test_edata_cache_fill, test_edata_cache_disable);
 }
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index ef8f9ff5..dc53b9eb 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -12,9 +12,9 @@ static bool print_escaped = false;
 
 typedef struct buf_descriptor_s buf_descriptor_t;
 struct buf_descriptor_s {
-	char *buf;
+	char  *buf;
 	size_t len;
-	bool mid_quote;
+	bool   mid_quote;
 };
 
 /*
@@ -56,8 +56,8 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 		}
 	}
 
-	size_t written = malloc_snprintf(buf_descriptor->buf,
-	    buf_descriptor->len, "%s", str);
+	size_t written = malloc_snprintf(
+	    buf_descriptor->buf, buf_descriptor->len, "%s", str);
 	expect_zu_eq(written, strlen(str), "Buffer overflow!");
 	buf_descriptor->buf += written;
 	buf_descriptor->len -= written;
@@ -66,19 +66,18 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 
 static void
 expect_emit_output(void (*emit_fn)(emitter_t *),
-    const char *expected_json_output,
-    const char *expected_json_compact_output,
+    const char *expected_json_output, const char *expected_json_compact_output,
     const char *expected_table_output) {
-	emitter_t emitter;
-	char buf[MALLOC_PRINTF_BUFSIZE];
+	emitter_t        emitter;
+	char             buf[MALLOC_PRINTF_BUFSIZE];
 	buf_descriptor_t buf_descriptor;
 
 	buf_descriptor.buf = buf;
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
 	buf_descriptor.mid_quote = false;
 
-	emitter_init(&emitter, emitter_output_json, &forwarding_cb,
-	    &buf_descriptor);
+	emitter_init(
+	    &emitter, emitter_output_json, &forwarding_cb, &buf_descriptor);
 	(*emit_fn)(&emitter);
 	expect_str_eq(expected_json_output, buf, "json output failure");
 
@@ -89,24 +88,24 @@ expect_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_json_compact, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	expect_str_eq(expected_json_compact_output, buf,
-	    "compact json output failure");
+	expect_str_eq(
+	    expected_json_compact_output, buf, "compact json output failure");
 
 	buf_descriptor.buf = buf;
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
 	buf_descriptor.mid_quote = false;
 
-	emitter_init(&emitter, emitter_output_table, &forwarding_cb,
-	    &buf_descriptor);
+	emitter_init(
+	    &emitter, emitter_output_table, &forwarding_cb, &buf_descriptor);
 	(*emit_fn)(&emitter);
 	expect_str_eq(expected_table_output, buf, "table output failure");
 }
 
 static void
 emit_dict(emitter_t *emitter) {
-	bool b_false = false;
-	bool b_true = true;
-	int i_123 = 123;
+	bool        b_false = false;
+	bool        b_true = true;
+	int         i_123 = 123;
 	const char *str = "a string";
 
 	emitter_begin(emitter);
@@ -122,48 +121,49 @@ emit_dict(emitter_t *emitter) {
 }
 
 static const char *dict_json =
-"{\n"
-"\t\"foo\": {\n"
-"\t\t\"abc\": false,\n"
-"\t\t\"def\": true,\n"
-"\t\t\"ghi\": 123,\n"
-"\t\t\"jkl\": \"a string\"\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"foo\": {\n"
+    "\t\t\"abc\": false,\n"
+    "\t\t\"def\": true,\n"
+    "\t\t\"ghi\": 123,\n"
+    "\t\t\"jkl\": \"a string\"\n"
+    "\t}\n"
+    "}\n";
 static const char *dict_json_compact =
-"{"
-	"\"foo\":{"
-		"\"abc\":false,"
-		"\"def\":true,"
-		"\"ghi\":123,"
-		"\"jkl\":\"a string\""
-	"}"
-"}";
+    "{"
+    "\"foo\":{"
+    "\"abc\":false,"
+    "\"def\":true,"
+    "\"ghi\":123,"
+    "\"jkl\":\"a string\""
+    "}"
+    "}";
 static const char *dict_table =
-"This is the foo table:\n"
-"  ABC: false\n"
-"  DEF: true\n"
-"  GHI: 123 (note_key1: \"a string\")\n"
-"  JKL: \"a string\" (note_key2: false)\n";
+    "This is the foo table:\n"
+    "  ABC: false\n"
+    "  DEF: true\n"
+    "  GHI: 123 (note_key1: \"a string\")\n"
+    "  JKL: \"a string\" (note_key2: false)\n";
 
 static void
 emit_table_printf(emitter_t *emitter) {
 	emitter_begin(emitter);
 	emitter_table_printf(emitter, "Table note 1\n");
-	emitter_table_printf(emitter, "Table note 2 %s\n",
-	    "with format string");
+	emitter_table_printf(
+	    emitter, "Table note 2 %s\n", "with format string");
 	emitter_end(emitter);
 }
 
 static const char *table_printf_json =
-"{\n"
-"}\n";
+    "{\n"
+    "}\n";
 static const char *table_printf_json_compact = "{}";
 static const char *table_printf_table =
-"Table note 1\n"
-"Table note 2 with format string\n";
+    "Table note 1\n"
+    "Table note 2 with format string\n";
 
-static void emit_nested_dict(emitter_t *emitter) {
+static void
+emit_nested_dict(emitter_t *emitter) {
 	int val = 123;
 	emitter_begin(emitter);
 	emitter_dict_begin(emitter, "json1", "Dict 1");
@@ -174,54 +174,65 @@ static void emit_nested_dict(emitter_t *emitter) {
 	emitter_dict_end(emitter); /* Close 3 */
 	emitter_dict_end(emitter); /* Close 1 */
 	emitter_dict_begin(emitter, "json4", "Dict 4");
-	emitter_kv(emitter, "primitive", "Another primitive",
-	    emitter_type_int, &val);
+	emitter_kv(
+	    emitter, "primitive", "Another primitive", emitter_type_int, &val);
 	emitter_dict_end(emitter); /* Close 4 */
 	emitter_end(emitter);
 }
 
 static const char *nested_dict_json =
-"{\n"
-"\t\"json1\": {\n"
-"\t\t\"json2\": {\n"
-"\t\t\t\"primitive\": 123\n"
-"\t\t},\n"
-"\t\t\"json3\": {\n"
-"\t\t}\n"
-"\t},\n"
-"\t\"json4\": {\n"
-"\t\t\"primitive\": 123\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"json1\": {\n"
+    "\t\t\"json2\": {\n"
+    "\t\t\t\"primitive\": 123\n"
+    "\t\t},\n"
+    "\t\t\"json3\": {\n"
+    "\t\t}\n"
+    "\t},\n"
+    "\t\"json4\": {\n"
+    "\t\t\"primitive\": 123\n"
+    "\t}\n"
+    "}\n";
 static const char *nested_dict_json_compact =
-"{"
-	"\"json1\":{"
-		"\"json2\":{"
-			"\"primitive\":123"
-		"},"
-		"\"json3\":{"
-		"}"
-	"},"
-	"\"json4\":{"
-		"\"primitive\":123"
-	"}"
-"}";
+    "{"
+    "\"json1\":{"
+    "\"json2\":{"
+    "\"primitive\":123"
+    "},"
+    "\"json3\":{"
+    "}"
+    "},"
+    "\"json4\":{"
+    "\"primitive\":123"
+    "}"
+    "}";
 static const char *nested_dict_table =
-"Dict 1\n"
-"  Dict 2\n"
-"    A primitive: 123\n"
-"  Dict 3\n"
-"Dict 4\n"
-"  Another primitive: 123\n";
+    "Dict 1\n"
+    "  Dict 2\n"
+    "    A primitive: 123\n"
+    "  Dict 3\n"
+    "Dict 4\n"
+    "  Another primitive: 123\n";
 
 static void
 emit_types(emitter_t *emitter) {
-	bool b = false;
-	int i = -123;
-	unsigned u = 123;
-	ssize_t zd = -456;
-	size_t zu = 456;
+	bool        b = false;
+	int         i = -123;
+	unsigned    u = 123;
+	ssize_t     zd = -456;
+	size_t      zu = 456;
 	const char *str = "string";
+	const char *long_str =
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz";
 	uint32_t u32 = 789;
 	uint64_t u64 = 10000000000ULL;
 
@@ -232,8 +243,9 @@ emit_types(emitter_t *emitter) {
 	emitter_kv(emitter, "k4", "K4", emitter_type_ssize, &zd);
 	emitter_kv(emitter, "k5", "K5", emitter_type_size, &zu);
 	emitter_kv(emitter, "k6", "K6", emitter_type_string, &str);
-	emitter_kv(emitter, "k7", "K7", emitter_type_uint32, &u32);
-	emitter_kv(emitter, "k8", "K8", emitter_type_uint64, &u64);
+	emitter_kv(emitter, "k7", "K7", emitter_type_string, &long_str);
+	emitter_kv(emitter, "k8", "K8", emitter_type_uint32, &u32);
+	emitter_kv(emitter, "k9", "K9", emitter_type_uint64, &u64);
 	/*
 	 * We don't test the title type, since it's only used for tables.  It's
 	 * tested in the emitter_table_row tests.
@@ -242,36 +254,66 @@ emit_types(emitter_t *emitter) {
 }
 
 static const char *types_json =
-"{\n"
-"\t\"k1\": false,\n"
-"\t\"k2\": -123,\n"
-"\t\"k3\": 123,\n"
-"\t\"k4\": -456,\n"
-"\t\"k5\": 456,\n"
-"\t\"k6\": \"string\",\n"
-"\t\"k7\": 789,\n"
-"\t\"k8\": 10000000000\n"
-"}\n";
+    "{\n"
+    "\t\"k1\": false,\n"
+    "\t\"k2\": -123,\n"
+    "\t\"k3\": 123,\n"
+    "\t\"k4\": -456,\n"
+    "\t\"k5\": 456,\n"
+    "\t\"k6\": \"string\",\n"
+    "\t\"k7\": \"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\",\n"
+    "\t\"k8\": 789,\n"
+    "\t\"k9\": 10000000000\n"
+    "}\n";
 static const char *types_json_compact =
-"{"
-	"\"k1\":false,"
-	"\"k2\":-123,"
-	"\"k3\":123,"
-	"\"k4\":-456,"
-	"\"k5\":456,"
-	"\"k6\":\"string\","
-	"\"k7\":789,"
-	"\"k8\":10000000000"
-"}";
+    "{"
+    "\"k1\":false,"
+    "\"k2\":-123,"
+    "\"k3\":123,"
+    "\"k4\":-456,"
+    "\"k5\":456,"
+    "\"k6\":\"string\","
+    "\"k7\":\"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\","
+    "\"k8\":789,"
+    "\"k9\":10000000000"
+    "}";
 static const char *types_table =
-"K1: false\n"
-"K2: -123\n"
-"K3: 123\n"
-"K4: -456\n"
-"K5: 456\n"
-"K6: \"string\"\n"
-"K7: 789\n"
-"K8: 10000000000\n";
+    "K1: false\n"
+    "K2: -123\n"
+    "K3: 123\n"
+    "K4: -456\n"
+    "K5: 456\n"
+    "K6: \"string\"\n"
+    "K7: \"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\"\n"
+    "K8: 789\n"
+    "K9: 10000000000\n";
 
 static void
 emit_modal(emitter_t *emitter) {
@@ -294,37 +336,37 @@ emit_modal(emitter_t *emitter) {
 }
 
 const char *modal_json =
-"{\n"
-"\t\"j0\": {\n"
-"\t\t\"j1\": {\n"
-"\t\t\t\"i1\": 123,\n"
-"\t\t\t\"i2\": 123,\n"
-"\t\t\t\"i4\": 123\n"
-"\t\t},\n"
-"\t\t\"i5\": 123,\n"
-"\t\t\"i6\": 123\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"j0\": {\n"
+    "\t\t\"j1\": {\n"
+    "\t\t\t\"i1\": 123,\n"
+    "\t\t\t\"i2\": 123,\n"
+    "\t\t\t\"i4\": 123\n"
+    "\t\t},\n"
+    "\t\t\"i5\": 123,\n"
+    "\t\t\"i6\": 123\n"
+    "\t}\n"
+    "}\n";
 const char *modal_json_compact =
-"{"
-	"\"j0\":{"
-		"\"j1\":{"
-			"\"i1\":123,"
-			"\"i2\":123,"
-			"\"i4\":123"
-		"},"
-		"\"i5\":123,"
-		"\"i6\":123"
-	"}"
-"}";
+    "{"
+    "\"j0\":{"
+    "\"j1\":{"
+    "\"i1\":123,"
+    "\"i2\":123,"
+    "\"i4\":123"
+    "},"
+    "\"i5\":123,"
+    "\"i6\":123"
+    "}"
+    "}";
 const char *modal_table =
-"T0\n"
-"  I1: 123\n"
-"  I3: 123\n"
-"  T1\n"
-"    I4: 123\n"
-"    I5: 123\n"
-"  I6: 123\n";
+    "T0\n"
+    "  I1: 123\n"
+    "  I3: 123\n"
+    "  T1\n"
+    "    I4: 123\n"
+    "    I5: 123\n"
+    "  I6: 123\n";
 
 static void
 emit_json_array(emitter_t *emitter) {
@@ -345,121 +387,124 @@ emit_json_array(emitter_t *emitter) {
 	emitter_json_kv(emitter, "bar", emitter_type_int, &ival);
 	emitter_json_kv(emitter, "baz", emitter_type_int, &ival);
 	emitter_json_object_end(emitter); /* Close arr[3]. */
-	emitter_json_array_end(emitter); /* Close arr. */
+	emitter_json_array_end(emitter);  /* Close arr. */
 	emitter_json_object_end(emitter); /* Close dict. */
 	emitter_end(emitter);
 }
 
 static const char *json_array_json =
-"{\n"
-"\t\"dict\": {\n"
-"\t\t\"arr\": [\n"
-"\t\t\t{\n"
-"\t\t\t\t\"foo\": 123\n"
-"\t\t\t},\n"
-"\t\t\t123,\n"
-"\t\t\t123,\n"
-"\t\t\t{\n"
-"\t\t\t\t\"bar\": 123,\n"
-"\t\t\t\t\"baz\": 123\n"
-"\t\t\t}\n"
-"\t\t]\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"dict\": {\n"
+    "\t\t\"arr\": [\n"
+    "\t\t\t{\n"
+    "\t\t\t\t\"foo\": 123\n"
+    "\t\t\t},\n"
+    "\t\t\t123,\n"
+    "\t\t\t123,\n"
+    "\t\t\t{\n"
+    "\t\t\t\t\"bar\": 123,\n"
+    "\t\t\t\t\"baz\": 123\n"
+    "\t\t\t}\n"
+    "\t\t]\n"
+    "\t}\n"
+    "}\n";
 static const char *json_array_json_compact =
-"{"
-	"\"dict\":{"
-		"\"arr\":["
-			"{"
-				"\"foo\":123"
-			"},"
-			"123,"
-			"123,"
-			"{"
-				"\"bar\":123,"
-				"\"baz\":123"
-			"}"
-		"]"
-	"}"
-"}";
+    "{"
+    "\"dict\":{"
+    "\"arr\":["
+    "{"
+    "\"foo\":123"
+    "},"
+    "123,"
+    "123,"
+    "{"
+    "\"bar\":123,"
+    "\"baz\":123"
+    "}"
+    "]"
+    "}"
+    "}";
 static const char *json_array_table = "";
 
 static void
 emit_json_nested_array(emitter_t *emitter) {
-	int ival = 123;
+	int   ival = 123;
 	char *sval = "foo";
 	emitter_begin(emitter);
 	emitter_json_array_begin(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_array_end(emitter);
 	emitter_json_array_end(emitter);
 	emitter_end(emitter);
 }
 
 static const char *json_nested_array_json =
-"{\n"
-"\t[\n"
-"\t\t[\n"
-"\t\t\t123,\n"
-"\t\t\t\"foo\",\n"
-"\t\t\t123,\n"
-"\t\t\t\"foo\"\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t\t123\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t\t\"foo\",\n"
-"\t\t\t123\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t]\n"
-"\t]\n"
-"}\n";
+    "{\n"
+    "\t[\n"
+    "\t\t[\n"
+    "\t\t\t123,\n"
+    "\t\t\t\"foo\",\n"
+    "\t\t\t123,\n"
+    "\t\t\t\"foo\"\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t\t123\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t\t\"foo\",\n"
+    "\t\t\t123\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t]\n"
+    "\t]\n"
+    "}\n";
 static const char *json_nested_array_json_compact =
-"{"
-	"["
-		"["
-			"123,"
-			"\"foo\","
-			"123,"
-			"\"foo\""
-		"],"
-		"["
-			"123"
-		"],"
-		"["
-			"\"foo\","
-			"123"
-		"],"
-		"["
-		"]"
-	"]"
-"}";
+    "{"
+    "["
+    "["
+    "123,"
+    "\"foo\","
+    "123,"
+    "\"foo\""
+    "],"
+    "["
+    "123"
+    "],"
+    "["
+    "\"foo\","
+    "123"
+    "],"
+    "["
+    "]"
+    "]"
+    "}";
 static const char *json_nested_array_table = "";
 
 static void
 emit_table_row(emitter_t *emitter) {
 	emitter_begin(emitter);
 	emitter_row_t row;
-	emitter_col_t abc = {emitter_justify_left, 10, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t abc = {
+	    emitter_justify_left, 10, emitter_type_title, {0}, {0, 0}};
 	abc.str_val = "ABC title";
-	emitter_col_t def = {emitter_justify_right, 15, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t def = {
+	    emitter_justify_right, 15, emitter_type_title, {0}, {0, 0}};
 	def.str_val = "DEF title";
-	emitter_col_t ghi = {emitter_justify_right, 5, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t ghi = {
+	    emitter_justify_right, 5, emitter_type_title, {0}, {0, 0}};
 	ghi.str_val = "GHI";
 
 	emitter_row_init(&row);
@@ -494,21 +539,21 @@ emit_table_row(emitter_t *emitter) {
 }
 
 static const char *table_row_json =
-"{\n"
-"}\n";
+    "{\n"
+    "}\n";
 static const char *table_row_json_compact = "{}";
 static const char *table_row_table =
-"ABC title       DEF title  GHI\n"
-"123                  true  456\n"
-"789                 false 1011\n"
-"\"a string\"          false  ghi\n";
+    "ABC title       DEF title  GHI\n"
+    "123                  true  456\n"
+    "789                 false 1011\n"
+    "\"a string\"          false  ghi\n";
 
-#define GENERATE_TEST(feature)					\
-TEST_BEGIN(test_##feature) {					\
-	expect_emit_output(emit_##feature, feature##_json,	\
-	    feature##_json_compact, feature##_table);		\
-}								\
-TEST_END
+#define GENERATE_TEST(feature)                                                 \
+	TEST_BEGIN(test_##feature) {                                           \
+		expect_emit_output(emit_##feature, feature##_json,             \
+		    feature##_json_compact, feature##_table);                  \
+	}                                                                      \
+	TEST_END
 
 GENERATE_TEST(dict)
 GENERATE_TEST(table_printf)
@@ -521,13 +566,7 @@ GENERATE_TEST(table_row)
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_dict,
-	    test_table_printf,
-	    test_nested_dict,
-	    test_types,
-	    test_modal,
-	    test_json_array,
-	    test_json_nested_array,
-	    test_table_row);
+	return test_no_reentrancy(test_dict, test_table_printf,
+	    test_nested_dict, test_types, test_modal, test_json_array,
+	    test_json_nested_array, test_table_row);
 }
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index e6bbd539..c178240e 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -2,9 +2,9 @@
 
 TEST_BEGIN(test_small_extent_size) {
 	unsigned nbins, i;
-	size_t sz, extent_size;
-	size_t mib[4];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
+	size_t   sz, extent_size;
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	/*
 	 * Iterate over all small size classes, get their extent sizes, and
@@ -21,25 +21,26 @@ TEST_BEGIN(test_small_extent_size) {
 		mib[2] = i;
 		sz = sizeof(size_t);
 		expect_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
-		    NULL, 0), 0, "Unexpected mallctlbymib failure");
-		expect_zu_eq(extent_size,
-		    sz_psz_quantize_floor(extent_size),
+		                NULL, 0),
+		    0, "Unexpected mallctlbymib failure");
+		expect_zu_eq(extent_size, sz_psz_quantize_floor(extent_size),
 		    "Small extent quantization should be a no-op "
-		    "(extent_size=%zu)", extent_size);
-		expect_zu_eq(extent_size,
-		    sz_psz_quantize_ceil(extent_size),
+		    "(extent_size=%zu)",
+		    extent_size);
+		expect_zu_eq(extent_size, sz_psz_quantize_ceil(extent_size),
 		    "Small extent quantization should be a no-op "
-		    "(extent_size=%zu)", extent_size);
+		    "(extent_size=%zu)",
+		    extent_size);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_large_extent_size) {
-	bool cache_oblivious;
+	bool     cache_oblivious;
 	unsigned nlextents, i;
-	size_t sz, extent_size_prev, ceil_prev;
-	size_t mib[4];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
+	size_t   sz, extent_size_prev, ceil_prev;
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	/*
 	 * Iterate over all large size classes, get their extent sizes, and
@@ -48,11 +49,13 @@ TEST_BEGIN(test_large_extent_size) {
 
 	sz = sizeof(bool);
 	expect_d_eq(mallctl("opt.cache_oblivious", (void *)&cache_oblivious,
-	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	                &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
@@ -62,20 +65,21 @@ TEST_BEGIN(test_large_extent_size) {
 		mib[2] = i;
 		sz = sizeof(size_t);
 		expect_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
-		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
-		extent_size = cache_oblivious ? lextent_size + PAGE :
-		    lextent_size;
+		                &sz, NULL, 0),
+		    0, "Unexpected mallctlbymib failure");
+		extent_size = cache_oblivious ? lextent_size + PAGE
+		                              : lextent_size;
 		floor = sz_psz_quantize_floor(extent_size);
 		ceil = sz_psz_quantize_ceil(extent_size);
 
 		expect_zu_eq(extent_size, floor,
 		    "Extent quantization should be a no-op for precise size "
-		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
-		    extent_size);
+		    "(lextent_size=%zu, extent_size=%zu)",
+		    lextent_size, extent_size);
 		expect_zu_eq(extent_size, ceil,
 		    "Extent quantization should be a no-op for precise size "
-		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
-		    extent_size);
+		    "(lextent_size=%zu, extent_size=%zu)",
+		    lextent_size, extent_size);
 
 		if (i > 0) {
 			expect_zu_eq(extent_size_prev,
@@ -85,23 +89,22 @@ TEST_BEGIN(test_large_extent_size) {
 				expect_zu_eq(ceil_prev, extent_size,
 				    "Ceiling should be a precise size "
 				    "(extent_size_prev=%zu, ceil_prev=%zu, "
-				    "extent_size=%zu)", extent_size_prev,
-				    ceil_prev, extent_size);
+				    "extent_size=%zu)",
+				    extent_size_prev, ceil_prev, extent_size);
 			}
 		}
 		if (i + 1 < nlextents) {
 			extent_size_prev = floor;
-			ceil_prev = sz_psz_quantize_ceil(extent_size +
-			    PAGE);
+			ceil_prev = sz_psz_quantize_ceil(extent_size + PAGE);
 		}
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_monotonic) {
-#define SZ_MAX	ZU(4 * 1024 * 1024)
+#define SZ_MAX ZU(4 * 1024 * 1024)
 	unsigned i;
-	size_t floor_prev, ceil_prev;
+	size_t   floor_prev, ceil_prev;
 
 	floor_prev = 0;
 	ceil_prev = 0;
@@ -117,12 +120,15 @@ TEST_BEGIN(test_monotonic) {
 		    floor, extent_size, ceil);
 		expect_zu_ge(ceil, extent_size,
 		    "Ceiling should be >= (floor=%zu, extent_size=%zu, "
-		    "ceil=%zu)", floor, extent_size, ceil);
+		    "ceil=%zu)",
+		    floor, extent_size, ceil);
 
-		expect_zu_le(floor_prev, floor, "Floor should be monotonic "
+		expect_zu_le(floor_prev, floor,
+		    "Floor should be monotonic "
 		    "(floor_prev=%zu, floor=%zu, extent_size=%zu, ceil=%zu)",
 		    floor_prev, floor, extent_size, ceil);
-		expect_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+		expect_zu_le(ceil_prev, ceil,
+		    "Ceiling should be monotonic "
 		    "(floor=%zu, extent_size=%zu, ceil_prev=%zu, ceil=%zu)",
 		    floor, extent_size, ceil_prev, ceil);
 
@@ -135,7 +141,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_small_extent_size,
-	    test_large_extent_size,
-	    test_monotonic);
+	    test_small_extent_size, test_large_extent_size, test_monotonic);
 }
diff --git a/test/unit/fb.c b/test/unit/fb.c
index ad72c75a..26a33fd9 100644
--- a/test/unit/fb.c
+++ b/test/unit/fb.c
@@ -5,21 +5,19 @@
 
 static void
 do_test_init(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	/* Junk fb's contents. */
 	memset(fb, 99, sz);
 	fb_init(fb, nbits);
 	for (size_t i = 0; i < nbits; i++) {
-		expect_false(fb_get(fb, nbits, i),
-		    "bitmap should start empty");
+		expect_false(fb_get(fb, nbits, i), "bitmap should start empty");
 	}
 	free(fb);
 }
 
 TEST_BEGIN(test_fb_init) {
-#define NB(nbits) \
-	do_test_init(nbits);
+#define NB(nbits) do_test_init(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -27,7 +25,7 @@ TEST_END
 
 static void
 do_test_get_set_unset(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 	/* Set the bits divisible by 3. */
@@ -56,8 +54,7 @@ do_test_get_set_unset(size_t nbits) {
 }
 
 TEST_BEGIN(test_get_set_unset) {
-#define NB(nbits) \
-	do_test_get_set_unset(nbits);
+#define NB(nbits) do_test_get_set_unset(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -65,7 +62,7 @@ TEST_END
 
 static ssize_t
 find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
-	for(; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
+	for (; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
 		bool expected_bit = i % 3 == 0 || i % 5 == 0;
 		if (expected_bit == bit) {
 			return i;
@@ -76,7 +73,7 @@ find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
 
 static void
 do_test_search_simple(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 
@@ -96,7 +93,7 @@ do_test_search_simple(size_t nbits) {
 		expect_zu_eq(ffs_compute, ffs_search, "ffs mismatch at %zu", i);
 
 		ssize_t fls_compute = find_3_5_compute(i, nbits, true, false);
-		size_t fls_search = fb_fls(fb, nbits, i);
+		size_t  fls_search = fb_fls(fb, nbits, i);
 		expect_zu_eq(fls_compute, fls_search, "fls mismatch at %zu", i);
 
 		size_t ffu_compute = find_3_5_compute(i, nbits, false, true);
@@ -112,8 +109,7 @@ do_test_search_simple(size_t nbits) {
 }
 
 TEST_BEGIN(test_search_simple) {
-#define NB(nbits) \
-	do_test_search_simple(nbits);
+#define NB(nbits) do_test_search_simple(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -145,15 +141,17 @@ expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(position + 1, fb_ffu(mostly_empty, nbits, position),
+		expect_zu_eq(position + 1,
+		    fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1,
+		    fb_flu(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position - 1, fb_flu(mostly_empty, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
 
 		expect_zu_eq(position + 1, fb_ffs(mostly_full, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zu_eq(position, fb_ffu(mostly_full, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(position, fb_flu(mostly_full, nbits, position),
@@ -162,8 +160,8 @@ expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
 		/* position > special_bit. */
 		expect_zu_eq(nbits, fb_ffs(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
@@ -186,7 +184,7 @@ do_test_search_exhaustive(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *empty = malloc(sz);
 	fb_init(empty, nbits);
 	fb_group_t *full = malloc(sz);
@@ -209,8 +207,7 @@ do_test_search_exhaustive(size_t nbits) {
 }
 
 TEST_BEGIN(test_search_exhaustive) {
-#define NB(nbits) \
-	do_test_search_exhaustive(nbits);
+#define NB(nbits) do_test_search_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -222,8 +219,8 @@ TEST_BEGIN(test_range_simple) {
 	 * big enough that usages of things like weirdnum (below) near the
 	 * beginning fit comfortably into the beginning of the bitmap.
 	 */
-	size_t nbits = 64 * 10;
-	size_t ngroups = FB_NGROUPS(nbits);
+	size_t      nbits = 64 * 10;
+	size_t      ngroups = FB_NGROUPS(nbits);
 	fb_group_t *fb = malloc(sizeof(fb_group_t) * ngroups);
 	fb_init(fb, nbits);
 	for (size_t i = 0; i < nbits; i++) {
@@ -255,7 +252,7 @@ TEST_END
 
 static void
 do_test_empty_full_exhaustive(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *empty = malloc(sz);
 	fb_init(empty, nbits);
 	fb_group_t *full = malloc(sz);
@@ -273,15 +270,15 @@ do_test_empty_full_exhaustive(size_t nbits) {
 
 		expect_false(fb_empty(empty, nbits), "error at bit %zu", i);
 		if (nbits != 1) {
-			expect_false(fb_full(empty, nbits),
-			    "error at bit %zu", i);
-			expect_false(fb_empty(full, nbits),
-			    "error at bit %zu", i);
+			expect_false(
+			    fb_full(empty, nbits), "error at bit %zu", i);
+			expect_false(
+			    fb_empty(full, nbits), "error at bit %zu", i);
 		} else {
-			expect_true(fb_full(empty, nbits),
-			    "error at bit %zu", i);
-			expect_true(fb_empty(full, nbits),
-			    "error at bit %zu", i);
+			expect_true(
+			    fb_full(empty, nbits), "error at bit %zu", i);
+			expect_true(
+			    fb_empty(full, nbits), "error at bit %zu", i);
 		}
 		expect_false(fb_full(full, nbits), "error at bit %zu", i);
 
@@ -294,8 +291,7 @@ do_test_empty_full_exhaustive(size_t nbits) {
 }
 
 TEST_BEGIN(test_empty_full) {
-#define NB(nbits) \
-	do_test_empty_full_exhaustive(nbits);
+#define NB(nbits) do_test_empty_full_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -306,8 +302,8 @@ TEST_END
  * built closely on top of it.
  */
 TEST_BEGIN(test_iter_range_simple) {
-	size_t set_limit = 30;
-	size_t nbits = 100;
+	size_t     set_limit = 30;
+	size_t     nbits = 100;
 	fb_group_t fb[FB_NGROUPS(100)];
 
 	fb_init(fb, nbits);
@@ -318,7 +314,7 @@ TEST_BEGIN(test_iter_range_simple) {
 	 */
 	size_t begin = (size_t)-1;
 	size_t len = (size_t)-1;
-	bool result;
+	bool   result;
 
 	/* A set of checks with only the first set_limit bits *set*. */
 	fb_set_range(fb, nbits, 0, set_limit);
@@ -410,7 +406,6 @@ TEST_BEGIN(test_iter_range_simple) {
 		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
 		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
 	}
-
 }
 TEST_END
 
@@ -426,11 +421,11 @@ fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 	ssize_t stride = (forward ? (ssize_t)1 : (ssize_t)-1);
 	ssize_t range_begin = (ssize_t)start;
 	for (; range_begin != (ssize_t)nbits && range_begin != -1;
-	    range_begin += stride) {
+	     range_begin += stride) {
 		if (fb_get(fb, nbits, range_begin) == val) {
 			ssize_t range_end = range_begin;
 			for (; range_end != (ssize_t)nbits && range_end != -1;
-			    range_end += stride) {
+			     range_end += stride) {
 				if (fb_get(fb, nbits, range_end) != val) {
 					break;
 				}
@@ -470,26 +465,26 @@ fb_range_longest_simple(fb_group_t *fb, size_t nbits, bool val) {
 }
 
 static void
-expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
-    bool val, bool forward) {
-	bool iter_res;
+expect_iter_results_at(
+    fb_group_t *fb, size_t nbits, size_t pos, bool val, bool forward) {
+	bool              iter_res;
 	size_t iter_begin JEMALLOC_CC_SILENCE_INIT(0);
-	size_t iter_len JEMALLOC_CC_SILENCE_INIT(0);
+	size_t iter_len   JEMALLOC_CC_SILENCE_INIT(0);
 	if (val) {
 		if (forward) {
-			iter_res = fb_srange_iter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_srange_iter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		} else {
-			iter_res = fb_srange_riter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_srange_riter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		}
 	} else {
 		if (forward) {
-			iter_res = fb_urange_iter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_urange_iter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		} else {
-			iter_res = fb_urange_riter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_urange_riter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		}
 	}
 
@@ -500,15 +495,15 @@ expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
 	 */
 	size_t simple_iter_begin = 0;
 	size_t simple_iter_len = 0;
-	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
-	    &simple_iter_len, val, forward);
+	simple_iter_res = fb_iter_simple(
+	    fb, nbits, pos, &simple_iter_begin, &simple_iter_len, val, forward);
 
 	expect_b_eq(iter_res, simple_iter_res, "Result mismatch at %zu", pos);
 	if (iter_res && simple_iter_res) {
 		assert_zu_eq(iter_begin, simple_iter_begin,
 		    "Begin mismatch at %zu", pos);
-		expect_zu_eq(iter_len, simple_iter_len,
-		    "Length mismatch at %zu", pos);
+		expect_zu_eq(
+		    iter_len, simple_iter_len, "Length mismatch at %zu", pos);
 	}
 }
 
@@ -543,7 +538,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 
@@ -558,7 +553,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
 	expect_iter_results(fb, nbits);
 
 	fb_unset_range(fb, nbits, 0, nbits);
-	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1: nbits / 2);
+	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1 : nbits / 2);
 	expect_iter_results(fb, nbits);
 
 	free(fb);
@@ -569,8 +564,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
  * computation.
  */
 TEST_BEGIN(test_iter_range_exhaustive) {
-#define NB(nbits) \
-	do_test_iter_range_exhaustive(nbits);
+#define NB(nbits) do_test_iter_range_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -581,8 +575,8 @@ TEST_END
  * returns the number of set bits in [scount_start, scount_end).
  */
 static size_t
-scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
-    size_t scount_end) {
+scount_contiguous(
+    size_t set_start, size_t set_end, size_t scount_start, size_t scount_end) {
 	/* No overlap. */
 	if (set_end <= scount_start || scount_end <= set_start) {
 		return 0;
@@ -611,8 +605,8 @@ scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
 }
 
 static size_t
-ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
-    size_t ucount_end) {
+ucount_contiguous(
+    size_t set_start, size_t set_end, size_t ucount_start, size_t ucount_end) {
 	/* No overlap. */
 	if (set_end <= ucount_start || ucount_end <= set_start) {
 		return ucount_end - ucount_start;
@@ -641,34 +635,33 @@ ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
 }
 
 static void
-expect_count_match_contiguous(fb_group_t *fb, size_t nbits, size_t set_start,
-    size_t set_end) {
+expect_count_match_contiguous(
+    fb_group_t *fb, size_t nbits, size_t set_start, size_t set_end) {
 	for (size_t i = 0; i < nbits; i++) {
 		for (size_t j = i + 1; j <= nbits; j++) {
 			size_t cnt = j - i;
-			size_t scount_expected = scount_contiguous(set_start,
-			    set_end, i, j);
+			size_t scount_expected = scount_contiguous(
+			    set_start, set_end, i, j);
 			size_t scount_computed = fb_scount(fb, nbits, i, cnt);
 			expect_zu_eq(scount_expected, scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with bits set in [%zu, %zu)",
 			    nbits, i, cnt, set_start, set_end);
 
-			size_t ucount_expected = ucount_contiguous(set_start,
-			    set_end, i, j);
+			size_t ucount_expected = ucount_contiguous(
+			    set_start, set_end, i, j);
 			size_t ucount_computed = fb_ucount(fb, nbits, i, cnt);
 			assert_zu_eq(ucount_expected, ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with bits set in [%zu, %zu)",
 			    nbits, i, cnt, set_start, set_end);
-
 		}
 	}
 }
 
 static void
 do_test_count_contiguous(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 
 	fb_init(fb, nbits);
@@ -688,7 +681,7 @@ do_test_count_contiguous(size_t nbits) {
 }
 
 TEST_BEGIN(test_count_contiguous_simple) {
-	enum {nbits = 300};
+	enum { nbits = 300 };
 	fb_group_t fb[FB_NGROUPS(nbits)];
 	fb_init(fb, nbits);
 	/* Just an arbitrary number. */
@@ -718,10 +711,10 @@ TEST_BEGIN(test_count_contiguous_simple) {
 TEST_END
 
 TEST_BEGIN(test_count_contiguous) {
-#define NB(nbits) \
-	/* This test is *particularly* slow in debug builds. */ \
-	if ((!config_debug && nbits < 300) || nbits < 150) { \
-		do_test_count_contiguous(nbits); \
+#define NB(nbits)                                                              \
+	/* This test is *particularly* slow in debug builds. */                \
+	if ((!config_debug && nbits < 300) || nbits < 150) {                   \
+		do_test_count_contiguous(nbits);                               \
 	}
 	NBITS_TAB
 #undef NB
@@ -729,15 +722,15 @@ TEST_BEGIN(test_count_contiguous) {
 TEST_END
 
 static void
-expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
-    size_t nbits) {
+expect_count_match_alternating(
+    fb_group_t *fb_even, fb_group_t *fb_odd, size_t nbits) {
 	for (size_t i = 0; i < nbits; i++) {
 		for (size_t j = i + 1; j <= nbits; j++) {
 			size_t cnt = j - i;
 			size_t odd_scount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
-			size_t odd_scount_computed = fb_scount(fb_odd, nbits,
-			    i, j - i);
+			size_t odd_scount_computed = fb_scount(
+			    fb_odd, nbits, i, j - i);
 			assert_zu_eq(odd_scount, odd_scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -745,8 +738,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t odd_ucount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
-			size_t odd_ucount_computed = fb_ucount(fb_odd, nbits,
-			    i, j - i);
+			size_t odd_ucount_computed = fb_ucount(
+			    fb_odd, nbits, i, j - i);
 			assert_zu_eq(odd_ucount, odd_ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -754,8 +747,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t even_scount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
-			size_t even_scount_computed = fb_scount(fb_even, nbits,
-			    i, j - i);
+			size_t even_scount_computed = fb_scount(
+			    fb_even, nbits, i, j - i);
 			assert_zu_eq(even_scount, even_scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -763,8 +756,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t even_ucount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
-			size_t even_ucount_computed = fb_ucount(fb_even, nbits,
-			    i, j - i);
+			size_t even_ucount_computed = fb_ucount(
+			    fb_even, nbits, i, j - i);
 			assert_zu_eq(even_ucount, even_ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -778,7 +771,7 @@ do_test_count_alternating(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb_even = malloc(sz);
 	fb_group_t *fb_odd = malloc(sz);
 
@@ -800,8 +793,7 @@ do_test_count_alternating(size_t nbits) {
 }
 
 TEST_BEGIN(test_count_alternating) {
-#define NB(nbits) \
-	do_test_count_alternating(nbits);
+#define NB(nbits) do_test_count_alternating(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -809,8 +801,9 @@ TEST_END
 
 static void
 do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
-    void (*fb_op)(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+    void (*fb_op)(
+        fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb1 = malloc(sz);
 	fb_group_t *fb2 = malloc(sz);
 	fb_group_t *fb_result = malloc(sz);
@@ -853,8 +846,10 @@ do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
 		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
 
 		/* Original bitmaps shouldn't change. */
-		expect_b_eq(bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
-		expect_b_eq(bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(
+		    bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(
+		    bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
 
 		/* New one should be bitwise and. */
 		expect_b_eq(op(bit1, bit2), fb_get(fb_result, nbits, i),
@@ -883,8 +878,7 @@ do_test_bit_and(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_and) {
-#define NB(nbits) \
-	do_test_bit_and(nbits);
+#define NB(nbits) do_test_bit_and(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -901,8 +895,7 @@ do_test_bit_or(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_or) {
-#define NB(nbits) \
-	do_test_bit_or(nbits);
+#define NB(nbits) do_test_bit_or(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -915,8 +908,8 @@ binary_not(bool a, bool b) {
 }
 
 static void
-fb_bit_not_shim(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2,
-    size_t nbits) {
+fb_bit_not_shim(
+    fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
 	(void)src2;
 	fb_bit_not(dst, src1, nbits);
 }
@@ -927,8 +920,7 @@ do_test_bit_not(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_not) {
-#define NB(nbits) \
-	do_test_bit_not(nbits);
+#define NB(nbits) do_test_bit_not(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -936,19 +928,9 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_fb_init,
-	    test_get_set_unset,
-	    test_search_simple,
-	    test_search_exhaustive,
-	    test_range_simple,
-	    test_empty_full,
-	    test_iter_range_simple,
-	    test_iter_range_exhaustive,
-	    test_count_contiguous_simple,
-	    test_count_contiguous,
-	    test_count_alternating,
-	    test_bit_and,
-	    test_bit_or,
-	    test_bit_not);
+	return test_no_reentrancy(test_fb_init, test_get_set_unset,
+	    test_search_simple, test_search_exhaustive, test_range_simple,
+	    test_empty_full, test_iter_range_simple, test_iter_range_exhaustive,
+	    test_count_contiguous_simple, test_count_contiguous,
+	    test_count_alternating, test_bit_and, test_bit_or, test_bit_not);
 }
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 4137423f..60675b77 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #ifndef _WIN32
-#include <sys/wait.h>
+#	include <sys/wait.h>
 #endif
 
 #ifndef _WIN32
@@ -13,8 +13,10 @@ wait_for_child_exit(int pid) {
 			test_fail("Unexpected waitpid() failure.");
 		}
 		if (WIFSIGNALED(status)) {
-			test_fail("Unexpected child termination due to "
-			    "signal %d", WTERMSIG(status));
+			test_fail(
+			    "Unexpected child termination due to "
+			    "signal %d",
+			    WTERMSIG(status));
 			break;
 		}
 		if (WIFEXITED(status)) {
@@ -35,7 +37,7 @@ TEST_BEGIN(test_fork) {
 
 	/* Set up a manually managed arena for test. */
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
@@ -43,8 +45,8 @@ TEST_BEGIN(test_fork) {
 	unsigned old_arena_ind;
 	sz = sizeof(old_arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-	    (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
@@ -95,7 +97,7 @@ do_fork_thd(void *arg) {
 
 #ifndef _WIN32
 static void
-do_test_fork_multithreaded() {
+do_test_fork_multithreaded(void) {
 	thd_t child;
 	thd_create(&child, do_fork_thd, NULL);
 	do_fork_thd(NULL);
@@ -135,7 +137,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_fork,
-	    test_fork_multithreaded);
+	return test_no_reentrancy(test_fork, test_fork_multithreaded);
 }
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
index 27f10976..02020efe 100644
--- a/test/unit/fxp.c
+++ b/test/unit/fxp.c
@@ -28,7 +28,7 @@ fxp_close(fxp_t a, fxp_t b) {
 static fxp_t
 xparse_fxp(const char *str) {
 	fxp_t result;
-	bool err = fxp_parse(&result, str, NULL);
+	bool  err = fxp_parse(&result, str, NULL);
 	assert_false(err, "Invalid fxp string: %s", str);
 	return result;
 }
@@ -36,14 +36,14 @@ xparse_fxp(const char *str) {
 static void
 expect_parse_accurate(const char *str, const char *parse_str) {
 	double true_val = strtod(str, NULL);
-	fxp_t fxp_val;
-	char *end;
-	bool err = fxp_parse(&fxp_val, parse_str, &end);
+	fxp_t  fxp_val;
+	char  *end;
+	bool   err = fxp_parse(&fxp_val, parse_str, &end);
 	expect_false(err, "Unexpected parse failure");
-	expect_ptr_eq(parse_str + strlen(str), end,
-	    "Didn't parse whole string");
-	expect_true(double_close(fxp2double(fxp_val), true_val),
-	    "Misparsed %s", str);
+	expect_ptr_eq(
+	    parse_str + strlen(str), end, "Didn't parse whole string");
+	expect_true(
+	    double_close(fxp2double(fxp_val), true_val), "Misparsed %s", str);
 }
 
 static void
@@ -100,12 +100,12 @@ static void
 expect_parse_failure(const char *str) {
 	fxp_t result = FXP_INIT_INT(333);
 	char *end = (void *)0x123;
-	bool err = fxp_parse(&result, str, &end);
+	bool  err = fxp_parse(&result, str, &end);
 	expect_true(err, "Expected a parse error on: %s", str);
-	expect_ptr_eq((void *)0x123, end,
-	    "Parse error shouldn't change results");
-	expect_u32_eq(result, FXP_INIT_INT(333),
-	    "Parse error shouldn't change results");
+	expect_ptr_eq(
+	    (void *)0x123, end, "Parse error shouldn't change results");
+	expect_u32_eq(
+	    result, FXP_INIT_INT(333), "Parse error shouldn't change results");
 }
 
 TEST_BEGIN(test_parse_invalid) {
@@ -129,7 +129,6 @@ expect_init_percent(unsigned percent, const char *str) {
 	    "Expect representations of FXP_INIT_PERCENT(%u) and "
 	    "fxp_parse(\"%s\") to be equal; got %x and %x",
 	    percent, str, result_init, result_parse);
-
 }
 
 /*
@@ -145,12 +144,12 @@ TEST_BEGIN(test_init_percent) {
 TEST_END
 
 static void
-expect_add(const char *astr, const char *bstr, const char* resultstr) {
+expect_add(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_add(a, b), result),
-	    "Expected %s + %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_add(a, b), result), "Expected %s + %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_add_simple) {
@@ -164,12 +163,12 @@ TEST_BEGIN(test_add_simple) {
 TEST_END
 
 static void
-expect_sub(const char *astr, const char *bstr, const char* resultstr) {
+expect_sub(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_sub(a, b), result),
-	    "Expected %s - %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_sub(a, b), result), "Expected %s - %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_sub_simple) {
@@ -183,12 +182,12 @@ TEST_BEGIN(test_sub_simple) {
 TEST_END
 
 static void
-expect_mul(const char *astr, const char *bstr, const char* resultstr) {
+expect_mul(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_mul(a, b), result),
-	    "Expected %s * %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_mul(a, b), result), "Expected %s * %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_mul_simple) {
@@ -202,12 +201,12 @@ TEST_BEGIN(test_mul_simple) {
 TEST_END
 
 static void
-expect_div(const char *astr, const char *bstr, const char* resultstr) {
+expect_div(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_div(a, b), result),
-	    "Expected %s / %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_div(a, b), result), "Expected %s / %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_div_simple) {
@@ -223,11 +222,11 @@ TEST_END
 
 static void
 expect_round(const char *str, uint32_t rounded_down, uint32_t rounded_nearest) {
-	fxp_t fxp = xparse_fxp(str);
+	fxp_t    fxp = xparse_fxp(str);
 	uint32_t fxp_rounded_down = fxp_round_down(fxp);
 	uint32_t fxp_rounded_nearest = fxp_round_nearest(fxp);
-	expect_u32_eq(rounded_down, fxp_rounded_down,
-	    "Mistake rounding %s down", str);
+	expect_u32_eq(
+	    rounded_down, fxp_rounded_down, "Mistake rounding %s down", str);
 	expect_u32_eq(rounded_nearest, fxp_rounded_nearest,
 	    "Mistake rounding %s to nearest", str);
 }
@@ -248,11 +247,11 @@ TEST_END
 
 static void
 expect_mul_frac(size_t a, const char *fracstr, size_t expected) {
-	fxp_t frac = xparse_fxp(fracstr);
+	fxp_t  frac = xparse_fxp(fracstr);
 	size_t result = fxp_mul_frac(a, frac);
 	expect_true(double_close(expected, result),
-	    "Expected %zu * %s == %zu (fracmul); got %zu", a, fracstr,
-	    expected, result);
+	    "Expected %zu * %s == %zu (fracmul); got %zu", a, fracstr, expected,
+	    result);
 }
 
 TEST_BEGIN(test_mul_frac_simple) {
@@ -273,7 +272,7 @@ TEST_END
 static void
 expect_print(const char *str) {
 	fxp_t fxp = xparse_fxp(str);
-	char buf[FXP_BUF_SIZE];
+	char  buf[FXP_BUF_SIZE];
 	fxp_print(fxp, buf);
 	expect_d_eq(0, strcmp(str, buf), "Couldn't round-trip print %s", str);
 }
@@ -298,33 +297,32 @@ TEST_BEGIN(test_print_simple) {
 TEST_END
 
 TEST_BEGIN(test_stress) {
-	const char *numbers[] = {
-		"0.0", "0.1", "0.2", "0.3", "0.4",
-		"0.5", "0.6", "0.7", "0.8", "0.9",
+	const char *numbers[] = {"0.0", "0.1", "0.2", "0.3", "0.4", "0.5",
+	    "0.6", "0.7", "0.8", "0.9",
 
-		"1.0", "1.1", "1.2", "1.3", "1.4",
-		"1.5", "1.6", "1.7", "1.8", "1.9",
+	    "1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8",
+	    "1.9",
 
-		"2.0", "2.1", "2.2", "2.3", "2.4",
-		"2.5", "2.6", "2.7", "2.8", "2.9",
+	    "2.0", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8",
+	    "2.9",
 
-		"17.0", "17.1", "17.2", "17.3", "17.4",
-		"17.5", "17.6", "17.7", "17.8", "17.9",
+	    "17.0", "17.1", "17.2", "17.3", "17.4", "17.5", "17.6", "17.7",
+	    "17.8", "17.9",
 
-		"18.0", "18.1", "18.2", "18.3", "18.4",
-		"18.5", "18.6", "18.7", "18.8", "18.9",
+	    "18.0", "18.1", "18.2", "18.3", "18.4", "18.5", "18.6", "18.7",
+	    "18.8", "18.9",
 
-		"123.0", "123.1", "123.2", "123.3", "123.4",
-		"123.5", "123.6", "123.7", "123.8", "123.9",
+	    "123.0", "123.1", "123.2", "123.3", "123.4", "123.5", "123.6",
+	    "123.7", "123.8", "123.9",
 
-		"124.0", "124.1", "124.2", "124.3", "124.4",
-		"124.5", "124.6", "124.7", "124.8", "124.9",
+	    "124.0", "124.1", "124.2", "124.3", "124.4", "124.5", "124.6",
+	    "124.7", "124.8", "124.9",
 
-		"125.0", "125.1", "125.2", "125.3", "125.4",
-		"125.5", "125.6", "125.7", "125.8", "125.9"};
-	size_t numbers_len = sizeof(numbers)/sizeof(numbers[0]);
+	    "125.0", "125.1", "125.2", "125.3", "125.4", "125.5", "125.6",
+	    "125.7", "125.8", "125.9"};
+	size_t      numbers_len = sizeof(numbers) / sizeof(numbers[0]);
 	for (size_t i = 0; i < numbers_len; i++) {
-		fxp_t fxp_a = xparse_fxp(numbers[i]);
+		fxp_t  fxp_a = xparse_fxp(numbers[i]);
 		double double_a = strtod(numbers[i], NULL);
 
 		uint32_t fxp_rounded_down = fxp_round_down(fxp_a);
@@ -338,37 +336,35 @@ TEST_BEGIN(test_stress) {
 		    "Incorrectly rounded-to-nearest %s", numbers[i]);
 
 		for (size_t j = 0; j < numbers_len; j++) {
-			fxp_t fxp_b = xparse_fxp(numbers[j]);
+			fxp_t  fxp_b = xparse_fxp(numbers[j]);
 			double double_b = strtod(numbers[j], NULL);
 
-			fxp_t fxp_sum = fxp_add(fxp_a, fxp_b);
+			fxp_t  fxp_sum = fxp_add(fxp_a, fxp_b);
 			double double_sum = double_a + double_b;
 			expect_true(
 			    double_close(fxp2double(fxp_sum), double_sum),
 			    "Miscomputed %s + %s", numbers[i], numbers[j]);
 
 			if (double_a > double_b) {
-				fxp_t fxp_diff = fxp_sub(fxp_a, fxp_b);
+				fxp_t  fxp_diff = fxp_sub(fxp_a, fxp_b);
 				double double_diff = double_a - double_b;
-				expect_true(
-				    double_close(fxp2double(fxp_diff),
-				    double_diff),
+				expect_true(double_close(fxp2double(fxp_diff),
+				                double_diff),
 				    "Miscomputed %s - %s", numbers[i],
 				    numbers[j]);
 			}
 
-			fxp_t fxp_prod = fxp_mul(fxp_a, fxp_b);
+			fxp_t  fxp_prod = fxp_mul(fxp_a, fxp_b);
 			double double_prod = double_a * double_b;
 			expect_true(
 			    double_close(fxp2double(fxp_prod), double_prod),
 			    "Miscomputed %s * %s", numbers[i], numbers[j]);
 
 			if (double_b != 0.0) {
-				fxp_t fxp_quot = fxp_div(fxp_a, fxp_b);
+				fxp_t  fxp_quot = fxp_div(fxp_a, fxp_b);
 				double double_quot = double_a / double_b;
-				expect_true(
-				    double_close(fxp2double(fxp_quot),
-				    double_quot),
+				expect_true(double_close(fxp2double(fxp_quot),
+				                double_quot),
 				    "Miscomputed %s / %s", numbers[i],
 				    numbers[j]);
 			}
@@ -379,16 +375,8 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_parse_valid,
-	    test_parse_invalid,
-	    test_init_percent,
-	    test_add_simple,
-	    test_sub_simple,
-	    test_mul_simple,
-	    test_div_simple,
-	    test_round_simple,
-	    test_mul_frac_simple,
-	    test_print_simple,
-	    test_stress);
+	return test_no_reentrancy(test_parse_valid, test_parse_invalid,
+	    test_init_percent, test_add_simple, test_sub_simple,
+	    test_mul_simple, test_div_simple, test_round_simple,
+	    test_mul_frac_simple, test_print_simple, test_stress);
 }
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 49f08238..e39110fc 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -39,30 +39,38 @@ typedef enum {
 static int
 hash_variant_bits(hash_variant_t variant) {
 	switch (variant) {
-	case hash_variant_x86_32: return 32;
-	case hash_variant_x86_128: return 128;
-	case hash_variant_x64_128: return 128;
-	default: not_reached();
+	case hash_variant_x86_32:
+		return 32;
+	case hash_variant_x86_128:
+		return 128;
+	case hash_variant_x64_128:
+		return 128;
+	default:
+		not_reached();
 	}
 }
 
 static const char *
 hash_variant_string(hash_variant_t variant) {
 	switch (variant) {
-	case hash_variant_x86_32: return "hash_x86_32";
-	case hash_variant_x86_128: return "hash_x86_128";
-	case hash_variant_x64_128: return "hash_x64_128";
-	default: not_reached();
+	case hash_variant_x86_32:
+		return "hash_x86_32";
+	case hash_variant_x86_128:
+		return "hash_x86_128";
+	case hash_variant_x64_128:
+		return "hash_x64_128";
+	default:
+		not_reached();
 	}
 }
 
-#define KEY_SIZE	256
+#define KEY_SIZE 256
 static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	const int hashbytes = hash_variant_bits(variant) / 8;
 	const int hashes_size = hashbytes * 256;
-	VARIABLE_ARRAY(uint8_t, hashes, hashes_size);
-	VARIABLE_ARRAY(uint8_t, final, hashbytes);
+	VARIABLE_ARRAY_UNSAFE(uint8_t, hashes, hashes_size);
+	VARIABLE_ARRAY_UNSAFE(uint8_t, final, hashbytes);
 	unsigned i;
 	uint32_t computed, expected;
 
@@ -79,20 +87,24 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 		switch (variant) {
 		case hash_variant_x86_32: {
 			uint32_t out;
-			out = hash_x86_32(key, i, 256-i);
-			memcpy(&hashes[i*hashbytes], &out, hashbytes);
+			out = hash_x86_32(key, i, 256 - i);
+			memcpy(&hashes[i * hashbytes], &out, hashbytes);
 			break;
-		} case hash_variant_x86_128: {
+		}
+		case hash_variant_x86_128: {
 			uint64_t out[2];
-			hash_x86_128(key, i, 256-i, out);
-			memcpy(&hashes[i*hashbytes], out, hashbytes);
+			hash_x86_128(key, i, 256 - i, out);
+			memcpy(&hashes[i * hashbytes], out, hashbytes);
 			break;
-		} case hash_variant_x64_128: {
+		}
+		case hash_variant_x64_128: {
 			uint64_t out[2];
-			hash_x64_128(key, i, 256-i, out);
-			memcpy(&hashes[i*hashbytes], out, hashbytes);
+			hash_x64_128(key, i, 256 - i, out);
+			memcpy(&hashes[i * hashbytes], out, hashbytes);
 			break;
-		} default: not_reached();
+		}
+		default:
+			not_reached();
 		}
 	}
 
@@ -102,33 +114,50 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 		uint32_t out = hash_x86_32(hashes, hashes_size, 0);
 		memcpy(final, &out, sizeof(out));
 		break;
-	} case hash_variant_x86_128: {
+	}
+	case hash_variant_x86_128: {
 		uint64_t out[2];
 		hash_x86_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
-	} case hash_variant_x64_128: {
+	}
+	case hash_variant_x64_128: {
 		uint64_t out[2];
 		hash_x64_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
-	} default: not_reached();
+	}
+	default:
+		not_reached();
 	}
 
-	computed = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) |
-	    (final[3] << 24);
+	computed = ((uint32_t) final[0] << 0) | ((uint32_t) final[1] << 8)
+	    | ((uint32_t) final[2] << 16) | ((uint32_t) final[3] << 24);
 
 	switch (variant) {
 #ifdef JEMALLOC_BIG_ENDIAN
-	case hash_variant_x86_32: expected = 0x6213303eU; break;
-	case hash_variant_x86_128: expected = 0x266820caU; break;
-	case hash_variant_x64_128: expected = 0xcc622b6fU; break;
+	case hash_variant_x86_32:
+		expected = 0x6213303eU;
+		break;
+	case hash_variant_x86_128:
+		expected = 0x266820caU;
+		break;
+	case hash_variant_x64_128:
+		expected = 0xcc622b6fU;
+		break;
 #else
-	case hash_variant_x86_32: expected = 0xb0f57ee3U; break;
-	case hash_variant_x86_128: expected = 0xb3ece62aU; break;
-	case hash_variant_x64_128: expected = 0x6384ba69U; break;
+	case hash_variant_x86_32:
+		expected = 0xb0f57ee3U;
+		break;
+	case hash_variant_x86_128:
+		expected = 0xb3ece62aU;
+		break;
+	case hash_variant_x64_128:
+		expected = 0x6384ba69U;
+		break;
 #endif
-	default: not_reached();
+	default:
+		not_reached();
 	}
 
 	expect_u32_eq(computed, expected,
@@ -138,8 +167,8 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 
 static void
 hash_variant_verify(hash_variant_t variant) {
-#define MAX_ALIGN	16
-	uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)];
+#define MAX_ALIGN 16
+	uint8_t  key[KEY_SIZE + (MAX_ALIGN - 1)];
 	unsigned i;
 
 	for (i = 0; i < MAX_ALIGN; i++) {
@@ -166,8 +195,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_hash_x86_32,
-	    test_hash_x86_128,
-	    test_hash_x64_128);
+	return test(test_hash_x86_32, test_hash_x86_128, test_hash_x64_128);
 }
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 16a6f1b0..3a6b3c13 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -2,19 +2,19 @@
 
 #include "jemalloc/internal/hook.h"
 
-static void *arg_extra;
-static int arg_type;
-static void *arg_result;
-static void *arg_address;
-static size_t arg_old_usize;
-static size_t arg_new_usize;
+static void     *arg_extra;
+static int       arg_type;
+static void     *arg_result;
+static void     *arg_address;
+static size_t    arg_old_usize;
+static size_t    arg_new_usize;
 static uintptr_t arg_result_raw;
 static uintptr_t arg_args_raw[4];
 
 static int call_count = 0;
 
 static void
-reset_args() {
+reset_args(void) {
 	arg_extra = NULL;
 	arg_type = 12345;
 	arg_result = NULL;
@@ -40,7 +40,7 @@ alloc_free_size(size_t sz) {
  * allocation scenarios.
  */
 static void
-be_reentrant() {
+be_reentrant(void) {
 	/* Let's make sure the tcache is non-empty if enabled. */
 	alloc_free_size(1);
 	alloc_free_size(1024);
@@ -71,13 +71,13 @@ set_args_raw(uintptr_t *args_raw, int nargs) {
 
 static void
 expect_args_raw(uintptr_t *args_raw_expected, int nargs) {
-	int cmp = memcmp(args_raw_expected, arg_args_raw,
-	    sizeof(uintptr_t) * nargs);
+	int cmp = memcmp(
+	    args_raw_expected, arg_args_raw, sizeof(uintptr_t) * nargs);
 	expect_d_eq(cmp, 0, "Raw args mismatch");
 }
 
 static void
-reset() {
+reset(void) {
 	call_count = 0;
 	reset_args();
 }
@@ -95,8 +95,8 @@ test_alloc_hook(void *extra, hook_alloc_t type, void *result,
 }
 
 static void
-test_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]) {
+test_dalloc_hook(
+    void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 	call_count++;
 	arg_extra = extra;
 	arg_type = (int)type;
@@ -122,16 +122,15 @@ test_expand_hook(void *extra, hook_expand_t type, void *address,
 
 TEST_BEGIN(test_hooks_basic) {
 	/* Just verify that the record their arguments correctly. */
-	hooks_t hooks = {
-		&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
-		(void *)111};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)111};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
 
 	/* Alloc */
 	reset_args();
-	hook_invoke_alloc(hook_alloc_posix_memalign, (void *)222, 333,
-	    args_raw);
+	hook_invoke_alloc(
+	    hook_alloc_posix_memalign, (void *)222, 333, args_raw);
 	expect_ptr_eq(arg_extra, (void *)111, "Passed wrong user pointer");
 	expect_d_eq((int)hook_alloc_posix_memalign, arg_type,
 	    "Passed wrong alloc type");
@@ -142,18 +141,18 @@ TEST_BEGIN(test_hooks_basic) {
 	/* Dalloc */
 	reset_args();
 	hook_invoke_dalloc(hook_dalloc_sdallocx, (void *)222, args_raw);
-	expect_d_eq((int)hook_dalloc_sdallocx, arg_type,
-	    "Passed wrong dalloc type");
+	expect_d_eq(
+	    (int)hook_dalloc_sdallocx, arg_type, "Passed wrong dalloc type");
 	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
 	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
 	expect_args_raw(args_raw, 3);
 
 	/* Expand */
 	reset_args();
-	hook_invoke_expand(hook_expand_xallocx, (void *)222, 333, 444, 555,
-	    args_raw);
-	expect_d_eq((int)hook_expand_xallocx, arg_type,
-	    "Passed wrong expand type");
+	hook_invoke_expand(
+	    hook_expand_xallocx, (void *)222, 333, 444, 555, args_raw);
+	expect_d_eq(
+	    (int)hook_expand_xallocx, arg_type, "Passed wrong expand type");
 	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
 	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
 	expect_zu_eq(333, arg_old_usize, "Passed wrong old usize");
@@ -205,7 +204,7 @@ TEST_END
 
 TEST_BEGIN(test_hooks_remove) {
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, NULL};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 	call_count = 0;
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
@@ -216,14 +215,13 @@ TEST_BEGIN(test_hooks_remove) {
 	hook_remove(TSDN_NULL, handle);
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, NULL);
 	expect_d_eq(call_count, 0, "Hook invoked after removal");
-
 }
 TEST_END
 
 TEST_BEGIN(test_hooks_alloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	/* Stop malloc from being optimized away. */
@@ -237,8 +235,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_malloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 
@@ -247,11 +245,11 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	err = posix_memalign((void **)&ptr, 1024, 1);
 	expect_d_eq(call_count, 1, "Hook not called");
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	expect_d_eq(arg_type, (int)hook_alloc_posix_memalign,
-	    "Wrong hook type");
+	expect_d_eq(
+	    arg_type, (int)hook_alloc_posix_memalign, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)err, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)err, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)&ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[1], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[2], "Wrong argument");
@@ -262,11 +260,10 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	ptr = aligned_alloc(1024, 1);
 	expect_d_eq(call_count, 1, "Hook not called");
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	expect_d_eq(arg_type, (int)hook_alloc_aligned_alloc,
-	    "Wrong hook type");
+	expect_d_eq(arg_type, (int)hook_alloc_aligned_alloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -278,8 +275,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_calloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)11, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)13, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -292,8 +289,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_memalign, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -307,12 +304,26 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_valloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_VALLOC */
 
+	/* pvalloc */
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+	reset();
+	ptr = pvalloc(1);
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_pvalloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	free(ptr);
+#endif /* JEMALLOC_OVERRIDE_PVALLOC */
+
 	/* mallocx */
 	reset();
 	ptr = mallocx(1, MALLOCX_LG_ALIGN(10));
@@ -320,11 +331,11 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_mallocx, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1],
-	    "Wrong flags");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1], "Wrong flags");
 	free(ptr);
 
 	hook_remove(TSDN_NULL, handle);
@@ -334,7 +345,7 @@ TEST_END
 TEST_BEGIN(test_hooks_dalloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, &test_dalloc_hook, NULL, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -358,8 +369,8 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	expect_d_eq(arg_type, (int)hook_dalloc_dallocx, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1],
-	    "Wrong raw arg");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1], "Wrong raw arg");
 
 	/* sdallocx() */
 	reset();
@@ -371,8 +382,8 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong raw arg");
-	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2],
-	    "Wrong raw arg");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2], "Wrong raw arg");
 
 	hook_remove(TSDN_NULL, handle);
 }
@@ -381,7 +392,7 @@ TEST_END
 TEST_BEGIN(test_hooks_expand_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, NULL, &test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -407,9 +418,9 @@ TEST_BEGIN(test_hooks_expand_simple) {
 TEST_END
 
 TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
-	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
-		&test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)123};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -421,8 +432,8 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -434,14 +445,11 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 		realloc(ptr, 0);
 		expect_d_eq(call_count, 1, "Hook not called");
 		expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-		expect_d_eq(arg_type, (int)hook_dalloc_realloc,
-		    "Wrong hook type");
-		expect_ptr_eq(ptr, arg_address,
-		    "Wrong pointer freed");
-		expect_u64_eq((uintptr_t)ptr, arg_args_raw[0],
-		    "Wrong raw arg");
-		expect_u64_eq((uintptr_t)0, arg_args_raw[1],
-		    "Wrong raw arg");
+		expect_d_eq(
+		    arg_type, (int)hook_dalloc_realloc, "Wrong hook type");
+		expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+		expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+		expect_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong raw arg");
 	}
 
 	/* realloc(NULL, 0) as malloc(0) */
@@ -451,8 +459,8 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -464,9 +472,9 @@ TEST_END
 static void
 do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
     int expand_type, int dalloc_type) {
-	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
-		&test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)123};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -482,8 +490,8 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, expand_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)130, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -508,11 +516,11 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	}
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_ptr_eq(ptr2, arg_address, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr2, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
-	    "Wrong argument");
+	expect_u64_eq(
+	    (uintptr_t)2 * 1024 * 1024, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* Realloc with move, small. */
@@ -526,8 +534,8 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
 	expect_ptr_eq(ptr2, arg_result, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr2, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)128, arg_args_raw[1], "Wrong argument");
 	free(ptr2);
@@ -543,11 +551,11 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
 	expect_ptr_eq(ptr2, arg_result, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr2, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
-	    "Wrong argument");
+	expect_u64_eq(
+	    (uintptr_t)2 * 1024 * 1024, arg_args_raw[1], "Wrong argument");
 	free(ptr2);
 
 	hook_remove(TSDN_NULL, handle);
@@ -559,8 +567,8 @@ realloc_wrapper(void *ptr, size_t size, UNUSED int flags) {
 }
 
 TEST_BEGIN(test_hooks_realloc) {
-	do_realloc_test(&realloc_wrapper, 0, hook_expand_realloc,
-	    hook_dalloc_realloc);
+	do_realloc_test(
+	    &realloc_wrapper, 0, hook_expand_realloc, hook_dalloc_realloc);
 }
 TEST_END
 
@@ -573,14 +581,9 @@ TEST_END
 int
 main(void) {
 	/* We assert on call counts. */
-	return test_no_reentrancy(
-	    test_hooks_basic,
-	    test_hooks_null,
-	    test_hooks_remove,
-	    test_hooks_alloc_simple,
-	    test_hooks_dalloc_simple,
-	    test_hooks_expand_simple,
-	    test_hooks_realloc_as_malloc_or_free,
-	    test_hooks_realloc,
+	return test_no_reentrancy(test_hooks_basic, test_hooks_null,
+	    test_hooks_remove, test_hooks_alloc_simple,
+	    test_hooks_dalloc_simple, test_hooks_expand_simple,
+	    test_hooks_realloc_as_malloc_or_free, test_hooks_realloc,
 	    test_hooks_rallocx);
 }
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index dfd57f39..9c4253cd 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -5,7 +5,7 @@
 
 #define SHARD_IND 111
 
-#define ALLOC_MAX (HUGEPAGE / 4)
+#define ALLOC_MAX (HUGEPAGE)
 
 typedef struct test_data_s test_data_t;
 struct test_data_s {
@@ -13,30 +13,89 @@ struct test_data_s {
 	 * Must be the first member -- we convert back and forth between the
 	 * test_data_t and the hpa_shard_t;
 	 */
-	hpa_shard_t shard;
+	hpa_shard_t   shard;
 	hpa_central_t central;
-	base_t *base;
+	base_t       *base;
 	edata_cache_t shard_edata_cache;
 
 	emap_t emap;
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
-	/* slab_max_alloc */
-	ALLOC_MAX,
-	/* hugification threshold */
-	HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(25),
-	/* deferral_allowed */
-	false,
-	/* hugify_delay_ms */
-	10 * 1000,
-};
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_opts_t test_hpa_shard_opts_purge = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    HUGEPAGE - 5 * PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_eager};
 
 static hpa_shard_t *
-create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
-	bool err;
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool    err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
@@ -54,10 +113,12 @@ create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
-
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -73,8 +134,8 @@ destroy_test_data(hpa_shard_t *shard) {
 TEST_BEGIN(test_alloc_max) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	edata_t *edata;
@@ -82,12 +143,25 @@ TEST_BEGIN(test_alloc_max) {
 	/* Small max */
 	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
-	    false, &deferred_work_generated);
+	    /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
+
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
-	    false, false, &deferred_work_generated);
+	    false, /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
+	    /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
+
+	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false, false,
+	    /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
+
+	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE + PAGE, PAGE, false,
+	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_null(edata, "Allocation of larger than hugepage succeeded");
+
 	destroy_test_data(shard);
 }
 TEST_END
@@ -95,8 +169,8 @@ TEST_END
 typedef struct mem_contents_s mem_contents_t;
 struct mem_contents_s {
 	uintptr_t my_addr;
-	size_t size;
-	edata_t *my_edata;
+	size_t    size;
+	edata_t  *my_edata;
 	rb_node(mem_contents_t) link;
 };
 
@@ -106,8 +180,7 @@ mem_contents_cmp(const mem_contents_t *a, const mem_contents_t *b) {
 }
 
 typedef rb_tree(mem_contents_t) mem_tree_t;
-rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link,
-    mem_contents_cmp);
+rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link, mem_contents_cmp);
 
 static void
 node_assert_ordered(mem_contents_t *a, mem_contents_t *b) {
@@ -153,14 +226,14 @@ node_remove(mem_tree_t *tree, edata_t *edata) {
 TEST_BEGIN(test_stress) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_default);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	const size_t nlive_edatas_max = 500;
-	size_t nlive_edatas = 0;
-	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
+	size_t       nlive_edatas = 0;
+	edata_t    **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
 	/*
 	 * Nothing special about this constant; we're only fixing it for
 	 * consistency across runs.
@@ -186,13 +259,14 @@ TEST_BEGIN(test_stress) {
 			 */
 			size_t npages_min = 1;
 			size_t npages_max = ALLOC_MAX / PAGE;
-			size_t npages = npages_min + prng_range_zu(&prng_state,
-			    npages_max - npages_min);
+			size_t npages = npages_min
+			    + prng_range_zu(
+			        &prng_state, npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
 			    npages * PAGE, PAGE, false, false, false,
 			    &deferred_work_generated);
-			assert_ptr_not_null(edata,
-			    "Unexpected allocation failure");
+			assert_ptr_not_null(
+			    edata, "Unexpected allocation failure");
 			live_edatas[nlive_edatas] = edata;
 			nlive_edatas++;
 			node_insert(&tree, edata, npages);
@@ -201,7 +275,8 @@ TEST_BEGIN(test_stress) {
 			if (nlive_edatas == 0) {
 				continue;
 			}
-			size_t victim = prng_range_zu(&prng_state, nlive_edatas);
+			size_t victim = prng_range_zu(
+			    &prng_state, nlive_edatas);
 			edata_t *to_free = live_edatas[victim];
 			live_edatas[victim] = live_edatas[nlive_edatas - 1];
 			nlive_edatas--;
@@ -226,8 +301,8 @@ TEST_BEGIN(test_stress) {
 	for (size_t i = 0; i < nlive_edatas; i++) {
 		edata_t *to_free = live_edatas[i];
 		node_remove(&tree, to_free);
-		pai_dalloc(tsdn, &shard->pai, to_free,
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, to_free, &deferred_work_generated);
 	}
 	hpa_shard_destroy(tsdn, shard);
 
@@ -236,84 +311,6 @@ TEST_BEGIN(test_stress) {
 }
 TEST_END
 
-static void
-expect_contiguous(edata_t **edatas, size_t nedatas) {
-	for (size_t i = 0; i < nedatas; i++) {
-		size_t expected = (size_t)edata_base_get(edatas[0])
-		    + i * PAGE;
-		expect_zu_eq(expected, (size_t)edata_base_get(edatas[i]),
-		    "Mismatch at index %zu", i);
-	}
-}
-
-TEST_BEGIN(test_alloc_dalloc_batch) {
-	test_skip_if(!hpa_supported());
-
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-
-	bool deferred_work_generated = false;
-
-	enum {NALLOCS = 8};
-
-	edata_t *allocs[NALLOCS];
-	/*
-	 * Allocate a mix of ways; first half from regular alloc, second half
-	 * from alloc_batch.
-	 */
-	for (size_t i = 0; i < NALLOCS / 2; i++) {
-		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	edata_list_active_t allocs_list;
-	edata_list_active_init(&allocs_list);
-	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
-	    &allocs_list, &deferred_work_generated);
-	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
-	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		allocs[i] = edata_list_active_first(&allocs_list);
-		edata_list_active_remove(&allocs_list, allocs[i]);
-	}
-
-	/*
-	 * Should have allocated them contiguously, despite the differing
-	 * methods used.
-	 */
-	void *orig_base = edata_base_get(allocs[0]);
-	expect_contiguous(allocs, NALLOCS);
-
-	/*
-	 * Batch dalloc the first half, individually deallocate the second half.
-	 */
-	for (size_t i = 0; i < NALLOCS / 2; i++) {
-		edata_list_active_append(&allocs_list, allocs[i]);
-	}
-	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list,
-	    &deferred_work_generated);
-	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &shard->pai, allocs[i],
-		    &deferred_work_generated);
-	}
-
-	/* Reallocate (individually), and ensure reuse and contiguity. */
-	for (size_t i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
-	}
-	void *new_base = edata_base_get(allocs[0]);
-	expect_ptr_eq(orig_base, new_base,
-	    "Failed to reuse the allocated memory.");
-	expect_contiguous(allocs, NALLOCS);
-
-	destroy_test_data(shard);
-}
-TEST_END
-
 static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
 static void *
 defer_test_map(size_t size) {
@@ -328,24 +325,36 @@ defer_test_unmap(void *ptr, size_t size) {
 	(void)size;
 }
 
-static bool defer_purge_called = false;
+static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
 static void
 defer_test_purge(void *ptr, size_t size) {
 	(void)ptr;
-	(void)size;
-	defer_purge_called = true;
+	npurge_size = size;
+	++ndefer_purge_calls;
 }
 
-static bool defer_hugify_called = false;
-static void
-defer_test_hugify(void *ptr, size_t size) {
-	defer_hugify_called = true;
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
 }
 
-static bool defer_dehugify_called = false;
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
 static void
 defer_test_dehugify(void *ptr, size_t size) {
-	defer_dehugify_called = true;
+	++ndefer_dehugify_calls;
 }
 
 static nstime_t defer_curtime;
@@ -370,6 +379,7 @@ TEST_BEGIN(test_defer_time) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -379,7 +389,7 @@ TEST_BEGIN(test_defer_time) {
 	bool deferred_work_generated = false;
 
 	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -387,29 +397,29 @@ TEST_BEGIN(test_defer_time) {
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_false(defer_hugify_called, "Hugified too early");
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
 
 	/* Hugification delay is set to 10 seconds in options. */
 	nstime_init2(&defer_curtime, 11, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_true(defer_hugify_called, "Failed to hugify");
+	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
 
-	defer_hugify_called = false;
+	ndefer_hugify_calls = 0;
 
 	/* Purge.  Recall that dirty_mult is .25. */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 
 	hpa_shard_do_deferred_work(tsdn, shard);
 
-	expect_false(defer_hugify_called, "Hugified too early");
-	expect_true(defer_dehugify_called, "Should have dehugified");
-	expect_true(defer_purge_called, "Should have purged");
-	defer_hugify_called = false;
-	defer_dehugify_called = false;
-	defer_purge_called = false;
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(1, ndefer_dehugify_calls, "Should have dehugified");
+	expect_zu_eq(1, ndefer_purge_calls, "Should have purged");
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
 
 	/*
 	 * Refill the page.  We now meet the hugification threshold; we should
@@ -424,19 +434,1016 @@ TEST_BEGIN(test_defer_time) {
 	 * We would be ineligible for hugification, had we not already met the
 	 * threshold before dipping below it.
 	 */
-	pai_dalloc(tsdn, &shard->pai, edatas[0],
-	    &deferred_work_generated);
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
 	/* Wait for the threshold again. */
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_true(defer_hugify_called, "Hugified too early");
-	expect_false(defer_dehugify_called, "Unexpected dehugify");
-	expect_false(defer_purge_called, "Unexpected purge");
+	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Unexpected dehugify");
+	expect_zu_eq(0, ndefer_purge_calls, "Unexpected purge");
+	ndefer_hugify_calls = 0;
 
 	destroy_test_data(shard);
 }
 TEST_END
 
+TEST_BEGIN(test_purge_no_infinite_loop) {
+	test_skip_if(!hpa_supported());
+
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_purge);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	/*
+	 * This is not arbitrary value, it is chosen to met hugification
+	 * criteria for huge page and at the same time do not allow hugify page
+	 * without triggering a purge.
+	 */
+	const size_t npages = test_hpa_shard_opts_purge.hugification_threshold
+	        / PAGE
+	    + 1;
+	const size_t size = npages * PAGE;
+
+	bool     deferred_work_generated = false;
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, size, PAGE,
+	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected alloc failure");
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/* hpa_shard_do_deferred_work should not stuck in a purging loop */
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_no_min_purge_interval) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_min_purge_interval) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * We have a slab with dirty page and no active pages, but
+	 * opt.min_purge_interval_ms didn't pass yet.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	/* Minumum purge interval is set to 5 seconds in options. */
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/* Now we should purge, but nothing else. */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_purge) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 3 hugepages out of 8. */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Expect only 2 purges, because opt.dirty_mult is set to 0.25 and we still
+	 * have 5 active hugepages (1 / 5 = 0.2 < 0.25).
+	 */
+	expect_zu_eq(2, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	nstime_init2(&defer_curtime, 12, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * We are still having 5 active hugepages and now they are
+	 * matching hugification criteria long enough to actually hugify them.
+	 */
+	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * We still have completely dirty hugepage, but we are below
+	 * opt.dirty_mult.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_experimental_max_purge_nhp) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.experimental_max_purge_nhp = 1;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 3 hugepages out of 8. */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Expect only one purge call, because opts.experimental_max_purge_nhp
+	 * is set to 1.
+	 */
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	nstime_init2(&defer_curtime, 12, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/* We still above the limit for dirty pages. */
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	nstime_init2(&defer_curtime, 18, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/* Finally, we are below the limit, no purges are expected. */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_vectorized_opt_eq_zero) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+
+	defer_vectorized_purge_called = false;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 0);
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_false(defer_vectorized_purge_called, "No vec purge");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_starts_huge) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.min_purge_delay_ms = 10;
+	opts.min_purge_interval_ms = 0;
+
+	defer_vectorized_purge_called = false;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = 2 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 75%  */
+	int pages_to_deallocate = (int)(0.75 * NALLOCS);
+	for (int i = 0; i < pages_to_deallocate; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	/*
+	 * While there is enough to purge as we have one empty page and that
+	 * one meets the threshold,  we need to respect the delay, so no purging
+	 * should happen yet.
+	 */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early, delay==10ms");
+
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	/* Now, enough time has passed, so we expect to purge */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Expected purge");
+
+	/*
+	 * We purged one hugepage, so we expect to have one non-full page and it
+	 * should have half of the other dirty.
+	 */
+	psset_stats_t *stat = &shard->psset.stats;
+	expect_zu_eq(
+	    stat->empty_slabs[1].npageslabs, 0, "Expected zero huge slabs");
+	expect_zu_eq(stat->empty_slabs[0].npageslabs, 1, "Expected 1 nh slab");
+	expect_zu_eq(stat->full_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[1].npageslabs, 0, "");
+	expect_zu_eq(
+	    stat->merged.ndirty, HUGEPAGE_PAGES / 2, "One HP half dirty");
+
+	/*
+	 * We now allocate one more PAGE than a half the hugepage because we
+	 * want to make sure that one more hugepage is needed.
+	 */
+	deferred_work_generated = false;
+	const size_t HALF = HUGEPAGE_PAGES / 2;
+	edatas[1] = pai_alloc(tsdn, &shard->pai, PAGE * (HALF + 1), PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edatas[1], "Unexpected null edata");
+	expect_false(deferred_work_generated, "No page is purgable");
+
+	expect_zu_eq(stat->empty_slabs[1].npageslabs, 0, "");
+	expect_zu_eq(stat->empty_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[1].npageslabs, 0, "");
+
+	/*
+	 * We expect that all inactive bytes on the second page are counted as
+	 * dirty (this is because the page was huge and empty when we purged
+	 * it, thus, it is assumed to come back as huge, thus all the bytes are
+	 * counted as touched).
+	 */
+	expect_zu_eq(stat->merged.ndirty, 2 * HALF - 1,
+	    "2nd page is huge because it was empty and huge when purged");
+	expect_zu_eq(stat->merged.nactive, HALF + (HALF + 1), "1st + 2nd");
+
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	pai_dalloc(tsdn, &shard->pai, edatas[1], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(stat->merged.ndirty, 3 * HALF, "1st + 2nd");
+
+	/*
+	 * Deallocate last allocation and confirm that page is empty again, and
+	 * once new minimum delay is reached, page should be purged.
+	 */
+	ndefer_purge_calls = 0;
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "");
+	expect_zu_eq(stat->merged.ndirty, HALF, "2nd cleared as it was empty");
+	ndefer_purge_calls = 0;
+
+	/* Deallocate all the rest, but leave only two active */
+	for (int i = pages_to_deallocate; i < NALLOCS - 2; ++i) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	/*
+	 * With prior pai_dalloc our last page becomes purgable, however we
+	 * still want to respect the delay.  Thus, it is not time to purge yet.
+	 */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "Above limit, but not time yet");
+	expect_zu_eq(0, ndefer_purge_calls, "");
+
+	/*
+	 * Finally, we move the time ahead, and we confirm that purge happens
+	 * and that we have exactly two active base pages and none dirty.
+	 */
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "Above limit, but not time yet");
+	expect_zu_eq(1, ndefer_purge_calls, "");
+	expect_zu_eq(stat->merged.ndirty, 0, "Purged all");
+	expect_zu_eq(stat->merged.nactive, 2, "1st only");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_start_huge_purge_empty_only) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = HUGEPAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.min_purge_interval_ms = 0;
+
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = 2 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all from the first and one PAGE from the second HP. */
+	for (int i = 0; i < NALLOCS / 2 + 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Purge whole folio");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 1, "");
+	expect_zu_eq(shard->psset.stats.merged.nactive, HUGEPAGE_PAGES - 1, "");
+
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "Should not purge anything");
+
+	/* Allocate and free 2*PAGE so that it spills into second page again */
+	edatas[0] = pai_alloc(tsdn, &shard->pai, 2 * PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Purge whole folio");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_assume_huge_purge_fully) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.hugification_threshold = HUGEPAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.dirty_mult = FXP_INIT_PERCENT(1);
+
+	ndefer_purge_calls = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all */
+	for (int i = 0; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+
+	/* Stats should say no active */
+	expect_zu_eq(shard->psset.stats.merged.nactive, 0, "");
+	expect_zu_eq(
+	    shard->psset.stats.empty_slabs[0].npageslabs, 1, "Non huge");
+	npurge_size = 0;
+	edatas[0] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edatas[0], "Unexpected null edata");
+	expect_zu_eq(shard->psset.stats.merged.nactive, 1, "");
+	expect_zu_eq(shard->psset.stats.slabs[1].npageslabs, 1, "Huge nonfull");
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Should purge full folio");
+
+	/* Now allocate all, free 10%, alloc 5%, assert non-huge */
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	int ten_pct = NALLOCS / 10;
+	for (int i = 0; i < ten_pct; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(
+	    ten_pct * PAGE, npurge_size, "Should purge 10 percent of pages");
+
+	for (int i = 0; i < ten_pct / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	expect_zu_eq(
+	    shard->psset.stats.slabs[0].npageslabs, 1, "Nonhuge nonfull");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 0, "No dirty");
+
+	npurge_size = 0;
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_eager_with_purge_threshold) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	const size_t     THRESHOLD = 10;
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = THRESHOLD * PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.dirty_mult = FXP_INIT_PERCENT(0);
+
+	ndefer_purge_calls = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate less then threshold PAGEs. */
+	for (size_t i = 0; i < THRESHOLD - 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_false(deferred_work_generated, "No page is purgable");
+	expect_zu_eq(0, ndefer_purge_calls, "Should not purge yet");
+	/* Deallocate one more page to meet the threshold */
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[THRESHOLD - 1], &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 0, "");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_delay_when_not_allowed_deferral) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	const uint64_t   DELAY_NS = 100 * 1000 * 1000;
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = false;
+	opts.purge_threshold = HUGEPAGE - 2 * PAGE;
+	opts.min_purge_delay_ms = DELAY_NS / (1000 * 1000);
+	opts.hugify_style = hpa_hugify_style_lazy;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all */
+	for (int i = 0; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	/* curtime = 100.0s */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(0, ndefer_purge_calls, "Too early");
+
+	nstime_iadd(&defer_curtime, DELAY_NS - 1);
+	/* This activity will take the curtime=100.1 and reset purgability */
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Dealloc all but 2 pages, purgable delay_ns later*/
+	for (int i = 0; i < NALLOCS - 2; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	nstime_iadd(&defer_curtime, DELAY_NS);
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated);
+	expect_true(ndefer_purge_calls > 0, "Should have purged");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_deferred_until_time) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 1000;
+	opts.hugification_threshold = HUGEPAGE / 2;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 500;
+	opts.hugify_delay_ms = 3000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	/* Allocate one huge page */
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 25% */
+	for (int i = 0; i < NALLOCS / 4; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "We should hugify and purge");
+
+	/* Current time = 300ms, purge_eligible at 300ms + 1000ms */
+	nstime_init(&defer_curtime, 300UL * 1000 * 1000);
+	for (int i = NALLOCS / 4; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "Purge work generated");
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "not time for purging yet");
+
+	/* Current time = 900ms, purge_eligible at 1300ms */
+	nstime_init(&defer_curtime, 900UL * 1000 * 1000);
+	uint64_t until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(until_ns, BACKGROUND_THREAD_DEFERRED_MIN,
+	    "First pass did not happen");
+
+	/* Fake that first pass happened more than min_purge_interval_ago */
+	nstime_init(&shard->last_purge, 350UL * 1000 * 1000);
+	shard->stats.npurge_passes = 1;
+	until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(until_ns, BACKGROUND_THREAD_DEFERRED_MIN,
+	    "No need to heck anything it is more than interval");
+
+	nstime_init(&shard->last_purge, 900UL * 1000 * 1000);
+	nstime_init(&defer_curtime, 1000UL * 1000 * 1000);
+	/* Next purge expected at 900ms + min_purge_interval = 1400ms */
+	uint64_t expected_ms = 1400 - 1000;
+	until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(expected_ms, until_ns / (1000 * 1000), "Next in 400ms");
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_eager_no_hugify_on_threshold) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.9;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	/* First allocation makes the page huge */
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "No hugify needed - eager");
+	expect_zu_eq(shard->psset.stats.full_slabs[1].npageslabs, 1,
+	    "Page should be full-huge");
+
+	/* Deallocate 25% */
+	for (int i = 0; i < NALLOCS / 4; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "purge is needed");
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "No hugify needed - eager");
+	expect_zu_eq(ndefer_purge_calls, 1, "Purge should have happened");
+
+	/* Allocate 20% again, so that we are above hugification threshold */
+	ndefer_purge_calls = 0;
+	nstime_iadd(&defer_curtime, 800UL * 1000 * 1000);
+	for (int i = 0; i < NALLOCS / 4 - 1; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "no purging needed");
+	expect_zu_eq(ndefer_hugify_calls, 0, "no hugify - eager");
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.25;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_false(
+	    hpdata_huge_get(ps), "style=none, thp=madvise, should be non-huge");
+
+	ndefer_hugify_calls = 0;
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "Hugify none, no syscall");
+	ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_ptr_not_null(ps, "Unexpected null page");
+	expect_false(
+	    hpdata_huge_get(ps), "style=none, thp=madvise, should be non-huge");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_experimental_hpa_enforce_hugify) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	bool old_opt_value = opt_experimental_hpa_enforce_hugify;
+	opt_experimental_hpa_enforce_hugify = true;
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	/* Use eager so hugify would normally not be made on threshold */
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.deferral_allowed = true;
+	opts.hugify_delay_ms = 0;
+	opts.min_purge_interval_ms = 0;
+	opts.hugification_threshold = 0.9 * HUGEPAGE;
+
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES * 95 / 100 };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "Page was already huge");
+
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
+
+	/* Deallocate half to trigger purge */
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+	/*
+	 * Enforce hugify should have triggered dehugify syscall during purge
+	 * when the page is huge and not empty.
+	 */
+	expect_zu_ge(ndefer_dehugify_calls, 1,
+	    "Should have triggered dehugify syscall with eager style");
+
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 1, "");
+
+	opt_experimental_hpa_enforce_hugify = old_opt_value;
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -451,9 +1458,14 @@ main(void) {
 	(void)mem_tree_iter;
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
-	return test_no_reentrancy(
-	    test_alloc_max,
-	    test_stress,
-	    test_alloc_dalloc_batch,
-	    test_defer_time);
+	return test_no_reentrancy(test_alloc_max, test_stress, test_defer_time,
+	    test_purge_no_infinite_loop, test_no_min_purge_interval,
+	    test_min_purge_interval, test_purge,
+	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero,
+	    test_starts_huge, test_start_huge_purge_empty_only,
+	    test_assume_huge_purge_fully, test_eager_with_purge_threshold,
+	    test_delay_when_not_allowed_deferral, test_deferred_until_time,
+	    test_eager_no_hugify_on_threshold,
+	    test_hpa_hugify_style_none_huge_no_syscall,
+	    test_experimental_hpa_enforce_hugify);
 }
diff --git a/test/unit/hpa.sh b/test/unit/hpa.sh
new file mode 100644
index 00000000..22451f1d
--- /dev/null
+++ b/test/unit/hpa.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 81c25612..80cf2fed 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -1,8 +1,33 @@
 #include "test/jemalloc_test.h"
 #include "test/sleep.h"
 
+TEST_BEGIN(test_hpa_background_thread_a0_initialized) {
+	/*
+	 * Arena 0 has dedicated initialization path.  We'd like to make sure
+	 * deferral_allowed value initialized correctly from the start of the
+	 * application.
+	 */
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+	test_skip_if(!have_background_thread);
+	test_skip_if(san_guard_enabled());
+
+	bool   enabled = false;
+	size_t sz = sizeof(enabled);
+	int err = mallctl("background_thread", (void *)&enabled, &sz, NULL, 0);
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
+	expect_true(enabled, "Background thread should be enabled");
+
+	arena_t *a0 = arena_get(TSDN_NULL, 0, false);
+	expect_ptr_ne(a0, NULL, "");
+	bool deferral_allowed = a0->pa_shard.hpa_shard.opts.deferral_allowed;
+	expect_true(deferral_allowed,
+	    "Should have deferral_allowed option enabled for arena #0");
+}
+TEST_END
+
 static void
-sleep_for_background_thread_interval() {
+sleep_for_background_thread_interval(void) {
 	/*
 	 * The sleep interval set in our .sh file is 50ms.  So it likely will
 	 * run if we sleep for four times that.
@@ -11,9 +36,9 @@ sleep_for_background_thread_interval() {
 }
 
 static unsigned
-create_arena() {
+create_arena(void) {
 	unsigned arena_ind;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 2),
@@ -23,17 +48,17 @@ create_arena() {
 
 static size_t
 get_empty_ndirty(unsigned arena_ind) {
-	int err;
-	size_t ndirty_huge;
-	size_t ndirty_nonhuge;
+	int      err;
+	size_t   ndirty_huge;
+	size_t   ndirty_nonhuge;
 	uint64_t epoch = 1;
-	size_t sz = sizeof(epoch);
-	err = je_mallctl("epoch", (void *)&epoch, &sz, (void *)&epoch,
-	    sizeof(epoch));
+	size_t   sz = sizeof(epoch);
+	err = je_mallctl(
+	    "epoch", (void *)&epoch, &sz, (void *)&epoch, sizeof(epoch));
 	expect_d_eq(0, err, "Unexpected mallctl() failure");
 
 	size_t mib[6];
-	size_t miblen = sizeof(mib)/sizeof(mib[0]);
+	size_t miblen = sizeof(mib) / sizeof(mib[0]);
 	err = mallctlnametomib(
 	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", mib,
 	    &miblen);
@@ -45,8 +70,7 @@ get_empty_ndirty(unsigned arena_ind) {
 	expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
 
 	err = mallctlnametomib(
-	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib,
-	    &miblen);
+	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib, &miblen);
 	expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
 
 	sz = sizeof(ndirty_huge);
@@ -60,20 +84,20 @@ get_empty_ndirty(unsigned arena_ind) {
 static void
 set_background_thread_enabled(bool enabled) {
 	int err;
-	err = je_mallctl("background_thread", NULL, NULL, &enabled,
-	    sizeof(enabled));
+	err = je_mallctl(
+	    "background_thread", NULL, NULL, &enabled, sizeof(enabled));
 	expect_d_eq(0, err, "Unexpected mallctl failure");
 }
 
 static void
 wait_until_thread_is_enabled(unsigned arena_id) {
-	tsd_t* tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch();
 
 	bool sleeping = false;
-	int iterations = 0;
+	int  iterations = 0;
 	do {
-		background_thread_info_t *info =
-		    background_thread_info_get(arena_id);
+		background_thread_info_t *info = background_thread_info_get(
+		    arena_id);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		sleeping = background_thread_indefinite_sleep(info);
@@ -83,7 +107,34 @@ wait_until_thread_is_enabled(unsigned arena_id) {
 }
 
 static void
-expect_purging(unsigned arena_ind, bool expect_deferred) {
+expect_purging(unsigned arena_ind) {
+	size_t empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "Expected arena to start unused.");
+
+	void *ptrs[2];
+	ptrs[0] = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+	ptrs[1] = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "All pages should be active");
+
+	dallocx(ptrs[0], MALLOCX_TCACHE_NONE);
+	expect_true(empty_ndirty == 0 || empty_ndirty == 1,
+	    "Unexpected extra dirty page count: %zu", empty_ndirty);
+
+	/*
+	 * Wait for at least hpa_min_purge_interval_ms to trigger purge on next
+	 * deallocation.
+	 */
+	sleep_for_background_thread_interval();
+
+	dallocx(ptrs[1], MALLOCX_TCACHE_NONE);
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "There are should be no dirty pages");
+}
+
+static void
+expect_deferred_purging(unsigned arena_ind) {
 	size_t empty_ndirty;
 
 	empty_ndirty = get_empty_ndirty(arena_ind);
@@ -97,26 +148,20 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 	 */
 	bool observed_dirty_page = false;
 	for (int i = 0; i < 10; i++) {
-		void *ptr = mallocx(PAGE,
-		    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+		void *ptr = mallocx(
+		    PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
 		empty_ndirty = get_empty_ndirty(arena_ind);
 		expect_zu_eq(0, empty_ndirty, "All pages should be active");
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
 		empty_ndirty = get_empty_ndirty(arena_ind);
-		if (expect_deferred) {
-			expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
-			    opt_prof, "Unexpected extra dirty page count: %zu",
-			    empty_ndirty);
-		} else {
-			assert_zu_eq(0, empty_ndirty,
-			    "Saw dirty pages without deferred purging");
-		}
+		expect_true(empty_ndirty == 0 || empty_ndirty == 1 || opt_prof,
+		    "Unexpected extra dirty page count: %zu", empty_ndirty);
 		if (empty_ndirty > 0) {
 			observed_dirty_page = true;
 			break;
 		}
 	}
-	expect_b_eq(expect_deferred, observed_dirty_page, "");
+	expect_true(observed_dirty_page, "");
 
 	/*
 	 * Under high concurrency / heavy test load (e.g. using run_test.sh),
@@ -124,8 +169,8 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 	 * time.  Retry 100 times max before bailing out.
 	 */
 	unsigned retry = 0;
-	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0 &&
-	    expect_deferred && (retry++ < 100)) {
+	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0
+	    && (retry++ < 100)) {
 		sleep_for_background_thread_interval();
 	}
 
@@ -144,7 +189,7 @@ TEST_BEGIN(test_hpa_background_thread_purges) {
 	 * Our .sh sets dirty mult to 0, so all dirty pages should get purged
 	 * any time any thread frees.
 	 */
-	expect_purging(arena_ind, /* expect_deferred */ true);
+	expect_deferred_purging(arena_ind);
 }
 TEST_END
 
@@ -158,11 +203,11 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	unsigned arena_ind = create_arena();
 
 	set_background_thread_enabled(false);
-	expect_purging(arena_ind, false);
+	expect_purging(arena_ind);
 
 	set_background_thread_enabled(true);
 	wait_until_thread_is_enabled(arena_ind);
-	expect_purging(arena_ind, true);
+	expect_deferred_purging(arena_ind);
 }
 TEST_END
 
@@ -183,6 +228,12 @@ main(void) {
 		opt_background_thread = true;
 	}
 	return test_no_reentrancy(
+	    /*
+	     * Unfortunately, order of tests is important here.  We need to
+	     * make sure arena #0 initialized correctly, before we start
+	     * turning background thread on and off in other tests.
+	     */
+	    test_hpa_background_thread_a0_initialized,
 	    test_hpa_background_thread_purges,
 	    test_hpa_background_thread_enable_disable);
 }
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 65a56a08..5c85d48b 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0,experimental_hpa_start_huge_if_thp_always:false"
 
diff --git a/test/unit/hpa_sec_integration.c b/test/unit/hpa_sec_integration.c
new file mode 100644
index 00000000..c54cdc0c
--- /dev/null
+++ b/test/unit/hpa_sec_integration.c
@@ -0,0 +1,239 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(10),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts,
+    const sec_opts_t *sec_opts) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts, sec_opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	npurge_size = size;
+	++ndefer_purge_calls;
+}
+
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+// test that freed pages stay in SEC and hpa thinks they are active
+
+TEST_BEGIN(test_hpa_sec) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts;
+
+	enum { NALLOCS = 8 };
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 1;
+	sec_opts.max_alloc = 2 * PAGE;
+	sec_opts.max_bytes = NALLOCS * PAGE;
+	sec_opts.batch_fill_extra = 4;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts, &sec_opts);
+	bool         deferred_work_generated = false;
+	tsdn_t      *tsdn = tsd_tsdn(tsd_fetch());
+
+	/* alloc 1 PAGE, confirm sec has fill_extra bytes. */
+	edata_t *edata1 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata1, "Unexpected null edata");
+	hpa_shard_stats_t hpa_stats;
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive,
+	    1 + sec_opts.batch_fill_extra, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, PAGE * sec_opts.batch_fill_extra,
+	    "sec should have fill extra pages");
+
+	/* Alloc/dealloc NALLOCS times and confirm extents are in sec. */
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, 2 + NALLOCS, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, PAGE, "2 refills (at 0 and 4)");
+
+	for (int i = 0; i < NALLOCS - 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (2 + NALLOCS), "");
+	expect_zu_eq(
+	    hpa_stats.secstats.bytes, sec_opts.max_bytes, "sec should be full");
+
+	/* this one should flush 1 + 0.25 * 8 = 3 extents */
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated);
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (NALLOCS - 1), "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes,
+	    "sec should be full");
+
+	/* Next allocation should come from SEC and not increase active */
+	edata_t *edata2 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata2, "Unexpected null edata");
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes - PAGE,
+	    "sec should have max_bytes minus one page that just came from it");
+
+	/* We return this one and it stays in the cache */
+	pai_dalloc(tsdn, &shard->pai, edata2, &deferred_work_generated);
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes, "");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_hpa_sec);
+}
diff --git a/test/unit/hpa_sec_integration.sh b/test/unit/hpa_sec_integration.sh
new file mode 100644
index 00000000..22451f1d
--- /dev/null
+++ b/test/unit/hpa_sec_integration.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/hpa_thp_always.c b/test/unit/hpa_thp_always.c
new file mode 100644
index 00000000..6e56e663
--- /dev/null
+++ b/test/unit/hpa_thp_always.c
@@ -0,0 +1,204 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    HUGEPAGE - 5 * PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_eager};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts, &sec_opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	npurge_size = size;
+	++ndefer_purge_calls;
+}
+
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall_thp_always) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.25;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	/* Fake that system is in thp_always mode */
+	system_thp_mode_t old_mode = init_system_thp_mode;
+	init_system_thp_mode = system_thp_mode_always;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_true(hpdata_huge_get(ps),
+	    "Page should be huge because thp=always and hugify_style is none");
+
+	ndefer_hugify_calls = 0;
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "style=none, no syscall");
+	expect_zu_eq(ndefer_dehugify_calls, 0, "style=none, no syscall");
+	expect_zu_eq(ndefer_purge_calls, 1, "purge should happen");
+
+	destroy_test_data(shard);
+	init_system_thp_mode = old_mode;
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_hpa_hugify_style_none_huge_no_syscall_thp_always);
+}
diff --git a/test/unit/hpa_thp_always.sh b/test/unit/hpa_thp_always.sh
new file mode 100644
index 00000000..8b93006d
--- /dev/null
+++ b/test/unit/hpa_thp_always.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:true"
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
new file mode 100644
index 00000000..2121de49
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -0,0 +1,263 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    1,
+    /* purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts, &sec_opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+	++ndefer_purge_calls;
+}
+
+static size_t ndefer_vec_purge_calls = 0;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_vec_purge_calls;
+	return false;
+}
+
+static bool defer_vec_purge_didfail = false;
+static bool
+defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)vlen;
+	(void)nbytes;
+	defer_vec_purge_didfail = true;
+	return true;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_vectorized_failure_fallback) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge_fail;
+	defer_vec_purge_didfail = false;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_true(defer_vec_purge_didfail, "Expect vec purge fail");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect non-vec purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_more_regions_purged_from_one_page) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0)
+	    || HUGEPAGE_PAGES <= 4);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate almost 3 pages out of 8, and to force batching
+	 * leave the 2nd and 4th PAGE in the first 3 hugepages.
+	 */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		int j = i % HUGEPAGE_PAGES;
+		if (j != 1 && j != 3) {
+			pai_dalloc(tsdn, &shard->pai, edatas[i],
+			    &deferred_work_generated);
+		}
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+
+	/* We purge from 2 huge pages, each one 3 dirty continous segments.
+	 * For opt_process_madvise_max_batch = 2, that is
+	 * 2 calls for first page, and 2 calls for second as we don't
+	 * want to hold the lock on the second page while vectorized batch
+	 * of size 2 is already filled with the first one.
+	 */
+	expect_zu_eq(4, ndefer_vec_purge_calls, "Expect purge");
+	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_vectorized_failure_fallback,
+	    test_more_regions_purged_from_one_page);
+}
diff --git a/test/unit/hpa_vectorized_madvise.sh b/test/unit/hpa_vectorized_madvise.sh
new file mode 100644
index 00000000..35d7e6b6
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:2,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
new file mode 100644
index 00000000..e92988de
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -0,0 +1,272 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/hpa_utils.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts, &sec_opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+	++ndefer_purge_calls;
+}
+
+static size_t ndefer_vec_purge_calls = 0;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_vec_purge_calls;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_vectorized_purge) {
+	test_skip_if(!hpa_supported() || opt_process_madvise_max_batch == 0
+	    || HUGEPAGE_PAGES <= 4);
+	assert(opt_process_madvise_max_batch == 64);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate almost 3 hugepages out of 8, and to force batching
+	 * leave the 2nd and 4th PAGE in the first 3 hugepages.
+	 */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		int j = i % HUGEPAGE_PAGES;
+		if (j != 1 && j != 3) {
+			pai_dalloc(tsdn, &shard->pai, edatas[i],
+			    &deferred_work_generated);
+		}
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * We purge from 2 huge pages, each one 3 dirty continous segments.
+	 * For opt_process_madvise_max_batch = 64, that is all just one call
+	 */
+	expect_zu_eq(1, ndefer_vec_purge_calls, "Expect single purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_purge_more_than_one_batch_pages) {
+	test_skip_if(!hpa_supported()
+	    || (opt_process_madvise_max_batch < HPA_PURGE_BATCH_MAX)
+	    || HUGEPAGE_PAGES <= 4);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	opts.dirty_mult = FXP_INIT_PERCENT(1);
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum { NALLOCS = HPA_PURGE_BATCH_MAX * 3 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	for (int i = 0; i < HPA_PURGE_BATCH_MAX * 2 * (int)HUGEPAGE_PAGES;
+	    i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+
+	/* We have page batch size = HPA_PURGE_BATCH_MAX.  We have
+	 * HPA_PURGE_BATCH_MAX active pages, 2 * HPA_PURGE_BATCH_MAX dirty.
+	 * To achieve the balance of 1% max dirty we need to purge more than one
+	 * batch.
+	 */
+	size_t nexpected = 2;
+	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
+	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_vectorized_purge, test_purge_more_than_one_batch_pages);
+}
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.sh b/test/unit/hpa_vectorized_madvise_large_batch.sh
new file mode 100644
index 00000000..f996047f
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise_large_batch.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:64"
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 288e71d4..ac45d697 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -5,7 +5,7 @@
 
 TEST_BEGIN(test_reserve_alloc) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocating a page at a time, we should do first fit. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
@@ -57,7 +57,7 @@ TEST_END
 
 TEST_BEGIN(test_purge_simple) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
@@ -69,21 +69,25 @@ TEST_BEGIN(test_purge_simple) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t               nranges;
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
+	expect_zu_eq(1, nranges, "All dirty pages in a single range");
 
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool   got_result = hpdata_purge_next(
+            &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 
 	hpdata_purge_end(&hpdata, &purge_state);
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
@@ -97,10 +101,11 @@ TEST_END
  */
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocate the first 3/4 of the pages. */
-	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	void *alloc = hpdata_reserve_alloc(
+	    &hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
 	/* Free the first 1/4 and the third 1/4 of the pages. */
@@ -113,14 +118,16 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t               nranges;
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
+	expect_zu_eq(2, nranges, "First quarter and last half");
 
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
 	/* First purge. */
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
@@ -131,18 +138,20 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	    HUGEPAGE_PAGES / 4 * PAGE);
 
 	/* Now continue purging. */
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(
 	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
 	    purge_addr, "");
 	expect_zu_ge(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 
 	hpdata_purge_end(&hpdata, &purge_state);
 
@@ -151,19 +160,20 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 TEST_END
 
 TEST_BEGIN(test_purge_over_retained) {
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
 
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocate the first 3/4 of the pages. */
-	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	void *alloc = hpdata_reserve_alloc(
+	    &hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
 	/* Free the second quarter. */
-	void *second_quarter =
-	    (void *)((uintptr_t)alloc + HUGEPAGE_PAGES / 4 * PAGE);
+	void *second_quarter = (void *)((uintptr_t)alloc
+	    + HUGEPAGE_PAGES / 4 * PAGE);
 	hpdata_unreserve(&hpdata, second_quarter, HUGEPAGE_PAGES / 4 * PAGE);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
@@ -171,19 +181,24 @@ TEST_BEGIN(test_purge_over_retained) {
 	/* Purge the second quarter. */
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t               nranges;
+	size_t               to_purge_dirty = hpdata_purge_begin(
+            &hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
+	expect_zu_eq(1, nranges, "Second quarter only");
 
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(second_quarter, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 	hpdata_purge_end(&hpdata, &purge_state);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
@@ -199,19 +214,22 @@ TEST_BEGIN(test_purge_over_retained) {
 	 * re-purge it.  We expect a single purge of 3/4 of the hugepage,
 	 * purging half its pages.
 	 */
-	to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
+	expect_zu_eq(1, nranges, "Single range expected");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(3 * HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 	hpdata_purge_end(&hpdata, &purge_state);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 0, "");
@@ -220,7 +238,7 @@ TEST_END
 
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
@@ -234,11 +252,9 @@ TEST_BEGIN(test_hugify) {
 }
 TEST_END
 
-int main(void) {
-	return test_no_reentrancy(
-	    test_reserve_alloc,
-	    test_purge_simple,
-	    test_purge_intervening_dalloc,
-	    test_purge_over_retained,
+int
+main(void) {
+	return test_no_reentrancy(test_reserve_alloc, test_purge_simple,
+	    test_purge_intervening_dalloc, test_purge_over_retained,
 	    test_hugify);
 }
diff --git a/test/unit/huge.c b/test/unit/huge.c
index ec64e500..70abe4ac 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -8,38 +8,40 @@ const char *malloc_conf = "oversize_threshold:2097152";
 
 TEST_BEGIN(huge_bind_thread) {
 	unsigned arena1, arena2;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
 	/* Bind to a manual arena. */
 	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
-	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena1,
-	    sizeof(arena1)), 0, "Fail to bind thread");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena1, sizeof(arena1)), 0,
+	    "Fail to bind thread");
 
 	void *ptr = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
 
 	/* Switch back to arena 0. */
-	test_skip_if(have_percpu_arena &&
-	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 	arena2 = 0;
-	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena2,
-	    sizeof(arena2)), 0, "Fail to bind thread");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena2, sizeof(arena2)), 0,
+	    "Fail to bind thread");
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena2, 0, "Wrong arena used after binding");
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 
 	/* Then huge allocation should use the huge arena. */
 	ptr = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_ne(arena2, 0, "Wrong arena used after binding");
 	expect_u_ne(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
@@ -48,25 +50,26 @@ TEST_END
 
 TEST_BEGIN(huge_mallocx) {
 	unsigned arena1, arena2;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
 	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
 	void *huge = mallocx(HUGE_SZ, MALLOCX_ARENA(arena1));
 	expect_ptr_not_null(huge, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge,
-	    sizeof(huge)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge, sizeof(huge)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena1, arena2, "Wrong arena used for mallocx");
 	dallocx(huge, MALLOCX_ARENA(arena1));
 
 	void *huge2 = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(huge, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge2,
-	    sizeof(huge2)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.lookup", &arena2, &sz, &huge2, sizeof(huge2)), 0,
+	    "Unexpected mallctl() failure");
 	expect_u_ne(arena1, arena2,
 	    "Huge allocation should not come from the manual arena.");
-	expect_u_ne(arena2, 0,
-	    "Huge allocation should not come from the arena 0.");
+	expect_u_ne(
+	    arena2, 0, "Huge allocation should not come from the arena 0.");
 	dallocx(huge2, 0);
 }
 TEST_END
@@ -82,27 +85,27 @@ TEST_BEGIN(huge_allocation) {
 	expect_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
 	dallocx(ptr, 0);
 
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
 	ptr = mallocx(HUGE_SZ >> 1, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate half huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_ne(arena1, arena2, "Wrong arena used for half huge");
 	dallocx(ptr, 0);
 
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
 	expect_ptr_not_null(ptr, "Fail to allocate small size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	expect_u_ne(arena1, arena2,
-	    "Huge and small should be from different arenas");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
+	expect_u_ne(
+	    arena1, arena2, "Huge and small should be from different arenas");
 	dallocx(ptr, 0);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    huge_allocation,
-	    huge_mallocx,
-	    huge_bind_thread);
+	return test(huge_allocation, huge_mallocx, huge_bind_thread);
 }
diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index fe59e597..8111e4a5 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -1,27 +1,30 @@
 #include "test/jemalloc_test.h"
 
-#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	assert_d_eq(mallctl("experimental.utilization." node,		\
-	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	assert_zu_eq(out_sz, out_sz_ref,				\
-	    "Output size touched when given invalid arguments");	\
-	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content touched when given invalid arguments");	\
-} while (0)
+#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval)                          \
+	do {                                                                   \
+		assert_d_eq(                                                   \
+		    mallctl("experimental.utilization." node, a, b, c, d),     \
+		    EINVAL, "Should fail when " why_inval);                    \
+		assert_zu_eq(out_sz, out_sz_ref,                               \
+		    "Output size touched when given invalid arguments");       \
+		assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,               \
+		    "Output content touched when given invalid arguments");    \
+	} while (0)
 
-#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)			\
+#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)                          \
 	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
-#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
+#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)                          \
 	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
 
-#define TEST_UTIL_VALID(node) do {					\
-        assert_d_eq(mallctl("experimental.utilization." node,		\
-	    out, &out_sz, in, in_sz), 0,				\
-	    "Should return 0 on correct arguments");			\
-        expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
-	expect_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content should be changed");			\
-} while (0)
+#define TEST_UTIL_VALID(node)                                                  \
+	do {                                                                   \
+		assert_d_eq(mallctl("experimental.utilization." node, out,     \
+		                &out_sz, in, in_sz),                           \
+		    0, "Should return 0 on correct arguments");                \
+		expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");     \
+		expect_d_ne(memcmp(out, out_ref, out_sz_ref), 0,               \
+		    "Output content should be changed");                       \
+	} while (0)
 
 #define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
 
@@ -34,21 +37,19 @@ TEST_BEGIN(test_query) {
 	 * numerically unrelated to any size boundaries.
 	 */
 	for (sz = 7; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
-	    sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
-		void *p = mallocx(sz, 0);
+	     sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
+		void  *p = mallocx(sz, 0);
 		void **in = &p;
 		size_t in_sz = sizeof(const void *);
 		size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
-		void *out = mallocx(out_sz, 0);
-		void *out_ref = mallocx(out_sz, 0);
+		void  *out = mallocx(out_sz, 0);
+		void  *out_ref = mallocx(out_sz, 0);
 		size_t out_sz_ref = out_sz;
 
-		assert_ptr_not_null(p,
-		    "test pointer allocation failed");
-		assert_ptr_not_null(out,
-		    "test output allocation failed");
-		assert_ptr_not_null(out_ref,
-		    "test reference output allocation failed");
+		assert_ptr_not_null(p, "test pointer allocation failed");
+		assert_ptr_not_null(out, "test output allocation failed");
+		assert_ptr_not_null(
+		    out_ref, "test reference output allocation failed");
 
 #define SLABCUR_READ(out) (*(void **)out)
 #define COUNTS(out) ((size_t *)((void **)out + 1))
@@ -64,21 +65,18 @@ TEST_BEGIN(test_query) {
 		memcpy(out_ref, out, out_sz);
 
 		/* Test invalid argument(s) errors */
-		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz,
-		    "old is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz,
-		    "oldlenp is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz,
-		    "newp is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0,
-		    "newlen is zero");
+		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, NULL, in_sz, "newp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0, "newlen is zero");
 		in_sz -= 1;
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
-		    "invalid newlen");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, in, in_sz, "invalid newlen");
 		in_sz += 1;
 		out_sz_ref = out_sz -= 2 * sizeof(size_t);
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
-		    "invalid *oldlenp");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, in, in_sz, "invalid *oldlenp");
 		out_sz_ref = out_sz += 2 * sizeof(size_t);
 
 		/* Examine output for valid call */
@@ -100,8 +98,9 @@ TEST_BEGIN(test_query) {
 			    "Extent region count exceeded size");
 			expect_zu_ne(NREGS_READ(out), 0,
 			    "Extent region count must be positive");
-			expect_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
-			    != NULL && SLABCUR_READ(out) <= p),
+			expect_true(NFREE_READ(out) == 0
+			        || (SLABCUR_READ(out) != NULL
+			            && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
 
 			if (config_stats) {
@@ -117,8 +116,8 @@ TEST_BEGIN(test_query) {
 				    BIN_NREGS_READ(out),
 				    "Extent region count exceeded "
 				    "bin region count");
-				expect_zu_eq(BIN_NREGS_READ(out)
-				    % NREGS_READ(out), 0,
+				expect_zu_eq(
+				    BIN_NREGS_READ(out) % NREGS_READ(out), 0,
 				    "Bin region count isn't a multiple of "
 				    "extent region count");
 				expect_zu_le(
@@ -171,10 +170,10 @@ TEST_BEGIN(test_batch) {
 	 * numerically unrelated to any size boundaries.
 	 */
 	for (sz = 17; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
-	    sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
-		void *p = mallocx(sz, 0);
-		void *q = mallocx(sz, 0);
-		void *in[] = {p, q};
+	     sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
+		void  *p = mallocx(sz, 0);
+		void  *q = mallocx(sz, 0);
+		void  *in[] = {p, q};
 		size_t in_sz = sizeof(const void *) * 2;
 		size_t out[] = {-1, -1, -1, -1, -1, -1};
 		size_t out_sz = sizeof(size_t) * 6;
@@ -185,17 +184,14 @@ TEST_BEGIN(test_batch) {
 		assert_ptr_not_null(q, "test pointer allocation failed");
 
 		/* Test invalid argument(s) errors */
-		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
-		    "old is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz,
-		    "oldlenp is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz,
-		    "newp is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0,
-		    "newlen is zero");
+		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+		TEST_UTIL_BATCH_EINVAL(
+		    out, &out_sz, NULL, in_sz, "newp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0, "newlen is zero");
 		in_sz -= 1;
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-		    "newlen is not an exact multiple");
+		TEST_UTIL_BATCH_EINVAL(
+		    out, &out_sz, in, in_sz, "newlen is not an exact multiple");
 		in_sz += 1;
 		out_sz_ref = out_sz -= 2 * sizeof(size_t);
 		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
@@ -206,8 +202,8 @@ TEST_BEGIN(test_batch) {
 		    "*oldlenp and newlen do not match");
 		in_sz += sizeof(const void *);
 
-	/* Examine output for valid calls */
-#define TEST_EQUAL_REF(i, message) \
+		/* Examine output for valid calls */
+#define TEST_EQUAL_REF(i, message)                                             \
 	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
 
 #define NFREE_READ(out, i) out[(i) * 3]
@@ -238,8 +234,8 @@ TEST_BEGIN(test_batch) {
 			expect_zu_eq(NREGS_READ(out, 0), 1,
 			    "Extent region count should be one");
 		}
-		TEST_EQUAL_REF(1,
-		    "Should not overwrite content beyond what's needed");
+		TEST_EQUAL_REF(
+		    1, "Should not overwrite content beyond what's needed");
 		in_sz *= 2;
 		out_sz_ref = out_sz *= 2;
 
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
new file mode 100644
index 00000000..c206974b
--- /dev/null
+++ b/test/unit/json_stats.c
@@ -0,0 +1,509 @@
+#include "test/jemalloc_test.h"
+
+typedef struct {
+	char  *buf;
+	size_t len;
+	size_t capacity;
+} stats_buf_t;
+
+static void
+stats_buf_init(stats_buf_t *sbuf) {
+	/* 1MB buffer should be enough since per-arena stats are omitted. */
+	sbuf->capacity = 1 << 20;
+	sbuf->buf = mallocx(sbuf->capacity, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(sbuf->buf, "Failed to allocate stats buffer");
+	sbuf->len = 0;
+	sbuf->buf[0] = '\0';
+}
+
+static void
+stats_buf_fini(stats_buf_t *sbuf) {
+	dallocx(sbuf->buf, MALLOCX_TCACHE_NONE);
+}
+
+static void
+stats_buf_write_cb(void *opaque, const char *str) {
+	stats_buf_t *sbuf = (stats_buf_t *)opaque;
+	size_t       slen = strlen(str);
+
+	if (sbuf->len + slen + 1 > sbuf->capacity) {
+		return;
+	}
+	memcpy(&sbuf->buf[sbuf->len], str, slen + 1);
+	sbuf->len += slen;
+}
+
+static bool
+json_extract_uint64(const char *json, const char *key, uint64_t *result) {
+	char   search_key[128];
+	size_t key_len;
+
+	key_len = snprintf(search_key, sizeof(search_key), "\"%s\":", key);
+	if (key_len >= sizeof(search_key)) {
+		return true;
+	}
+
+	const char *pos = strstr(json, search_key);
+	if (pos == NULL) {
+		return true;
+	}
+
+	pos += key_len;
+	while (*pos == ' ' || *pos == '\t' || *pos == '\n') {
+		pos++;
+	}
+
+	char    *endptr;
+	uint64_t value = strtoull(pos, &endptr, 10);
+	if (endptr == pos) {
+		return true;
+	}
+
+	*result = value;
+	return false;
+}
+
+static const char *
+json_find_section(const char *json, const char *section_name) {
+	char   search_pattern[128];
+	size_t pattern_len;
+
+	pattern_len = snprintf(
+	    search_pattern, sizeof(search_pattern), "\"%s\":", section_name);
+	if (pattern_len >= sizeof(search_pattern)) {
+		return NULL;
+	}
+
+	return strstr(json, search_pattern);
+}
+
+static void
+verify_mutex_json(const char *mutexes_section, const char *mallctl_prefix,
+    const char *mutex_name) {
+	char   mallctl_path[128];
+	size_t sz;
+
+	const char *mutex_section = json_find_section(
+	    mutexes_section, mutex_name);
+	expect_ptr_not_null(mutex_section,
+	    "Could not find %s mutex section in JSON", mutex_name);
+
+	uint64_t ctl_num_ops, ctl_num_wait, ctl_num_spin_acq;
+	uint64_t ctl_num_owner_switch, ctl_total_wait_time, ctl_max_wait_time;
+	uint32_t ctl_max_num_thds;
+
+	sz = sizeof(uint64_t);
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_ops",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_ops, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_wait",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_wait, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_spin_acq",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_spin_acq, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_owner_switch",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_owner_switch, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.total_wait_time",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_total_wait_time, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.max_wait_time",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_max_wait_time, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	sz = sizeof(uint32_t);
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.max_num_thds",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_max_num_thds, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	uint64_t json_num_ops, json_num_wait, json_num_spin_acq;
+	uint64_t json_num_owner_switch, json_total_wait_time,
+	    json_max_wait_time;
+	uint64_t json_max_num_thds;
+
+	expect_false(
+	    json_extract_uint64(mutex_section, "num_ops", &json_num_ops),
+	    "%s: num_ops not found in JSON", mutex_name);
+	expect_false(
+	    json_extract_uint64(mutex_section, "num_wait", &json_num_wait),
+	    "%s: num_wait not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "num_spin_acq", &json_num_spin_acq),
+	    "%s: num_spin_acq not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(mutex_section, "num_owner_switch",
+	                 &json_num_owner_switch),
+	    "%s: num_owner_switch not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(mutex_section, "total_wait_time",
+	                 &json_total_wait_time),
+	    "%s: total_wait_time not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "max_wait_time", &json_max_wait_time),
+	    "%s: max_wait_time not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "max_num_thds", &json_max_num_thds),
+	    "%s: max_num_thds not found in JSON", mutex_name);
+
+	expect_u64_eq(json_num_ops, ctl_num_ops,
+	    "%s: JSON num_ops doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_wait, ctl_num_wait,
+	    "%s: JSON num_wait doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_spin_acq, ctl_num_spin_acq,
+	    "%s: JSON num_spin_acq doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_owner_switch, ctl_num_owner_switch,
+	    "%s: JSON num_owner_switch doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_total_wait_time, ctl_total_wait_time,
+	    "%s: JSON total_wait_time doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_max_wait_time, ctl_max_wait_time,
+	    "%s: JSON max_wait_time doesn't match mallctl", mutex_name);
+	expect_u32_eq((uint32_t)json_max_num_thds, ctl_max_num_thds,
+	    "%s: JSON max_num_thds doesn't match mallctl", mutex_name);
+}
+
+static const char  *global_mutex_names[] = {"background_thread",
+     "max_per_bg_thd", "ctl", "prof", "prof_thds_data", "prof_dump",
+     "prof_recent_alloc", "prof_recent_dump", "prof_stats"};
+static const size_t num_global_mutexes = sizeof(global_mutex_names)
+    / sizeof(global_mutex_names[0]);
+
+static const char  *arena_mutex_names[] = {"large", "extent_avail",
+     "extents_dirty", "extents_muzzy", "extents_retained", "decay_dirty",
+     "decay_muzzy", "base", "tcache_list", "hpa_shard", "hpa_shard_grow",
+     "hpa_sec"};
+static const size_t num_arena_mutexes = sizeof(arena_mutex_names)
+    / sizeof(arena_mutex_names[0]);
+
+static const char *
+json_find_object_end(const char *object_begin) {
+	int depth = 0;
+	for (const char *cur = object_begin; *cur != '\0'; cur++) {
+		if (*cur == '{') {
+			depth++;
+		} else if (*cur == '}') {
+			depth--;
+			if (depth == 0) {
+				return cur;
+			}
+			if (depth < 0) {
+				return NULL;
+			}
+		}
+	}
+	return NULL;
+}
+
+static const char *
+json_find_array_end(const char *array_begin) {
+	int depth = 0;
+	for (const char *cur = array_begin; *cur != '\0'; cur++) {
+		if (*cur == '[') {
+			depth++;
+		} else if (*cur == ']') {
+			depth--;
+			if (depth == 0) {
+				return cur;
+			}
+			if (depth < 0) {
+				return NULL;
+			}
+		}
+	}
+	return NULL;
+}
+
+static const char *
+json_find_previous_hpa_shard_object(
+    const char *json, const char *pos, const char **object_end) {
+	*object_end = NULL;
+	const char *found = NULL;
+	const char *cur = json;
+	const char *next;
+
+	while ((next = strstr(cur, "\"hpa_shard\":{")) != NULL && next < pos) {
+		found = strchr(next, '{');
+		cur = next + 1;
+	}
+	if (found == NULL) {
+		return NULL;
+	}
+	*object_end = json_find_object_end(found);
+	return found;
+}
+
+static const char *
+json_find_named_object(
+    const char *json, const char *key, const char **object_end) {
+	*object_end = NULL;
+	char   search_key[128];
+	size_t written = malloc_snprintf(
+	    search_key, sizeof(search_key), "\"%s\":{", key);
+	if (written >= sizeof(search_key)) {
+		return NULL;
+	}
+
+	const char *object_begin = strstr(json, search_key);
+	if (object_begin == NULL) {
+		return NULL;
+	}
+	object_begin = strchr(object_begin, '{');
+	if (object_begin == NULL) {
+		return NULL;
+	}
+	*object_end = json_find_object_end(object_begin);
+	return object_begin;
+}
+
+static const char *
+json_find_named_array(
+    const char *json, const char *key, const char **array_end) {
+	*array_end = NULL;
+	char   search_key[128];
+	size_t written = malloc_snprintf(
+	    search_key, sizeof(search_key), "\"%s\":[", key);
+	if (written >= sizeof(search_key)) {
+		return NULL;
+	}
+
+	const char *array_begin = strstr(json, search_key);
+	if (array_begin == NULL) {
+		return NULL;
+	}
+	array_begin = strchr(array_begin, '[');
+	if (array_begin == NULL) {
+		return NULL;
+	}
+	*array_end = json_find_array_end(array_begin);
+	return array_begin;
+}
+
+TEST_BEGIN(test_json_stats_mutexes) {
+	test_skip_if(!config_stats);
+
+	uint64_t epoch;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	/* "J" for JSON format, "a" to omit per-arena stats. */
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "Ja");
+
+	/* Verify global mutexes under stats.mutexes. */
+	const char *global_mutexes_section = json_find_section(
+	    sbuf.buf, "mutexes");
+	expect_ptr_not_null(global_mutexes_section,
+	    "Could not find global mutexes section in JSON output");
+
+	for (size_t i = 0; i < num_global_mutexes; i++) {
+		verify_mutex_json(global_mutexes_section, "stats.mutexes",
+		    global_mutex_names[i]);
+	}
+
+	/* Verify arena mutexes under stats.arenas.merged.mutexes. */
+	const char *arenas_section = json_find_section(
+	    sbuf.buf, "stats.arenas");
+	expect_ptr_not_null(arenas_section,
+	    "Could not find stats.arenas section in JSON output");
+
+	const char *merged_section = json_find_section(
+	    arenas_section, "merged");
+	expect_ptr_not_null(
+	    merged_section, "Could not find merged section in JSON output");
+
+	const char *arena_mutexes_section = json_find_section(
+	    merged_section, "mutexes");
+	expect_ptr_not_null(arena_mutexes_section,
+	    "Could not find arena mutexes section in JSON output");
+
+	for (size_t i = 0; i < num_arena_mutexes; i++) {
+		/*
+		 * MALLCTL_ARENAS_ALL is 4096 representing all arenas in
+		 * mallctl queries.
+		 */
+		verify_mutex_json(arena_mutexes_section,
+		    "stats.arenas.4096.mutexes", arena_mutex_names[i]);
+	}
+
+	stats_buf_fini(&sbuf);
+}
+TEST_END
+
+/*
+ * Verify that hpa_shard JSON stats contain "ndirty_huge" key in both
+ * full_slabs and empty_slabs sections.  A previous bug emitted duplicate
+ * "nactive_huge" instead of "ndirty_huge".
+ */
+TEST_BEGIN(test_hpa_shard_json_ndirty_huge) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	/* Do some allocation to create HPA state. */
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	/* "J" for JSON, include per-arena HPA stats. */
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	/*
+	 * Find "full_slabs" and check it contains "ndirty_huge".
+	 */
+	const char *full_slabs = strstr(sbuf.buf, "\"full_slabs\"");
+	if (full_slabs != NULL) {
+		const char *empty_slabs = strstr(full_slabs, "\"empty_slabs\"");
+		const char *search_end = empty_slabs != NULL
+		    ? empty_slabs
+		    : sbuf.buf + sbuf.len;
+		/*
+		 * Search for "ndirty_huge" between full_slabs and
+		 * empty_slabs.
+		 */
+		const char *ndirty = full_slabs;
+		bool        found = false;
+		while (ndirty < search_end) {
+			ndirty = strstr(ndirty, "\"ndirty_huge\"");
+			if (ndirty != NULL && ndirty < search_end) {
+				found = true;
+				break;
+			}
+			break;
+		}
+		expect_true(
+		    found, "full_slabs section should contain ndirty_huge key");
+	}
+
+	/*
+	 * Find "empty_slabs" and check it contains "ndirty_huge".
+	 */
+	const char *empty_slabs = strstr(sbuf.buf, "\"empty_slabs\"");
+	if (empty_slabs != NULL) {
+		/* Find the end of the empty_slabs object. */
+		const char *nonfull = strstr(empty_slabs, "\"nonfull_slabs\"");
+		const char *search_end = nonfull != NULL ? nonfull
+		                                         : sbuf.buf + sbuf.len;
+		const char *ndirty = strstr(empty_slabs, "\"ndirty_huge\"");
+		bool        found = (ndirty != NULL && ndirty < search_end);
+		expect_true(found,
+		    "empty_slabs section should contain ndirty_huge key");
+	}
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
+TEST_BEGIN(test_hpa_shard_json_contains_sec_stats) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	const char *sec_bytes = strstr(sbuf.buf, "\"sec_bytes\"");
+	expect_ptr_not_null(sec_bytes, "JSON output should contain sec_bytes");
+	const char *hpa_shard_end = NULL;
+	const char *hpa_shard = json_find_previous_hpa_shard_object(
+	    sbuf.buf, sec_bytes, &hpa_shard_end);
+	expect_ptr_not_null(hpa_shard,
+	    "sec_bytes should be associated with an hpa_shard JSON object");
+	expect_ptr_not_null(hpa_shard_end,
+	    "Could not find end of enclosing hpa_shard JSON object");
+	expect_true(sec_bytes != NULL && sec_bytes < hpa_shard_end,
+	    "sec_bytes should be nested inside hpa_shard JSON object");
+	const char *sec_hits = strstr(hpa_shard, "\"sec_hits\"");
+	expect_true(sec_hits != NULL && sec_hits < hpa_shard_end,
+	    "sec_hits should be nested inside hpa_shard JSON object");
+	const char *sec_misses = strstr(hpa_shard, "\"sec_misses\"");
+	expect_true(sec_misses != NULL && sec_misses < hpa_shard_end,
+	    "sec_misses should be nested inside hpa_shard JSON object");
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
+TEST_BEGIN(test_hpa_shard_json_contains_retained_stats) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	const char *full_slabs_end = NULL;
+	const char *full_slabs = json_find_named_object(
+	    sbuf.buf, "full_slabs", &full_slabs_end);
+	expect_ptr_not_null(
+	    full_slabs, "JSON output should contain full_slabs");
+	const char *full_retained = strstr(full_slabs, "\"nretained_nonhuge\"");
+	expect_true(full_retained != NULL && full_retained < full_slabs_end,
+	    "full_slabs should contain nretained_nonhuge");
+
+	const char *empty_slabs_end = NULL;
+	const char *empty_slabs = json_find_named_object(
+	    sbuf.buf, "empty_slabs", &empty_slabs_end);
+	expect_ptr_not_null(
+	    empty_slabs, "JSON output should contain empty_slabs");
+	const char *empty_retained = strstr(
+	    empty_slabs, "\"nretained_nonhuge\"");
+	expect_true(empty_retained != NULL && empty_retained < empty_slabs_end,
+	    "empty_slabs should contain nretained_nonhuge");
+
+	const char *nonfull_slabs_end = NULL;
+	const char *nonfull_slabs = json_find_named_array(
+	    sbuf.buf, "nonfull_slabs", &nonfull_slabs_end);
+	expect_ptr_not_null(
+	    nonfull_slabs, "JSON output should contain nonfull_slabs");
+	const char *nonfull_retained = strstr(
+	    nonfull_slabs, "\"nretained_nonhuge\"");
+	expect_true(
+	    nonfull_retained != NULL && nonfull_retained < nonfull_slabs_end,
+	    "nonfull_slabs should contain nretained_nonhuge");
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_json_stats_mutexes,
+	    test_hpa_shard_json_ndirty_huge,
+	    test_hpa_shard_json_contains_sec_stats,
+	    test_hpa_shard_json_contains_retained_stats);
+}
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 543092f1..80f51e15 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -1,13 +1,13 @@
 #include "test/jemalloc_test.h"
 
-#define arraylen(arr) (sizeof(arr)/sizeof(arr[0]))
+#define arraylen(arr) (sizeof(arr) / sizeof(arr[0]))
 static size_t ptr_ind;
 static void *volatile ptrs[100];
-static void *last_junked_ptr;
+static void  *last_junked_ptr;
 static size_t last_junked_usize;
 
 static void
-reset() {
+reset(void) {
 	ptr_ind = 0;
 	last_junked_ptr = NULL;
 	last_junked_usize = 0;
@@ -21,17 +21,17 @@ test_junk(void *ptr, size_t usize) {
 
 static void
 do_allocs(size_t size, bool zero, size_t lg_align) {
-#define JUNK_ALLOC(...)							\
-	do {								\
-		assert(ptr_ind + 1 < arraylen(ptrs));			\
-		void *ptr = __VA_ARGS__;				\
-		assert_ptr_not_null(ptr, "");				\
-		ptrs[ptr_ind++] = ptr;					\
-		if (opt_junk_alloc && !zero) {				\
-			expect_ptr_eq(ptr, last_junked_ptr, "");	\
-			expect_zu_eq(last_junked_usize,			\
-			    TEST_MALLOC_SIZE(ptr), "");			\
-		}							\
+#define JUNK_ALLOC(...)                                                        \
+	do {                                                                   \
+		assert(ptr_ind + 1 < arraylen(ptrs));                          \
+		void *ptr = __VA_ARGS__;                                       \
+		assert_ptr_not_null(ptr, "");                                  \
+		ptrs[ptr_ind++] = ptr;                                         \
+		if (opt_junk_alloc && !zero) {                                 \
+			expect_ptr_eq(ptr, last_junked_ptr, "");               \
+			expect_zu_eq(                                          \
+			    last_junked_usize, TEST_MALLOC_SIZE(ptr), "");     \
+		}                                                              \
 	} while (0)
 	if (!zero && lg_align == 0) {
 		JUNK_ALLOC(malloc(size));
@@ -51,21 +51,20 @@ do_allocs(size_t size, bool zero, size_t lg_align) {
 #endif
 	int zero_flag = zero ? MALLOCX_ZERO : 0;
 	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)));
-	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)
-	    | MALLOCX_TCACHE_NONE));
+	JUNK_ALLOC(mallocx(size,
+	    zero_flag | MALLOCX_LG_ALIGN(lg_align) | MALLOCX_TCACHE_NONE));
 	if (lg_align >= LG_SIZEOF_PTR) {
 		void *memalign_result;
-		int err = posix_memalign(&memalign_result, (1 << lg_align),
-		    size);
+		int   err = posix_memalign(
+                    &memalign_result, (1 << lg_align), size);
 		assert_d_eq(err, 0, "");
 		JUNK_ALLOC(memalign_result);
 	}
 }
 
 TEST_BEGIN(test_junk_alloc_free) {
-	bool zerovals[] = {false, true};
-	size_t sizevals[] = {
-		1, 8, 100, 1000, 100*1000
+	bool   zerovals[] = {false, true};
+	size_t sizevals[] = {1, 8, 100, 1000, 100 * 1000
 	/*
 	 * Memory allocation failure is a real possibility in 32-bit mode.
 	 * Rather than try to check in the face of resource exhaustion, we just
@@ -75,49 +74,49 @@ TEST_BEGIN(test_junk_alloc_free) {
 	 * mechanisms; but this is in fact the case.
 	 */
 #if LG_SIZEOF_PTR == 3
-		    , 10 * 1000 * 1000
+	    ,
+	    10 * 1000 * 1000
 #endif
 	};
-	size_t lg_alignvals[] = {
-		0, 4, 10, 15, 16, LG_PAGE
+	size_t lg_alignvals[] = {0, 4, 10, 15, 16, LG_PAGE
 #if LG_SIZEOF_PTR == 3
-		    , 20, 24
+	    ,
+	    20, 24
 #endif
 	};
 
-#define JUNK_FREE(...)							\
-	do {								\
-		do_allocs(size, zero, lg_align);			\
-		for (size_t n = 0; n < ptr_ind; n++) {			\
-			void *ptr = ptrs[n];				\
-			__VA_ARGS__;					\
-			if (opt_junk_free) {				\
-				assert_ptr_eq(ptr, last_junked_ptr,	\
-				    "");				\
-				assert_zu_eq(usize, last_junked_usize,	\
-				    "");				\
-			}						\
-			reset();					\
-		}							\
+#define JUNK_FREE(...)                                                         \
+	do {                                                                   \
+		do_allocs(size, zero, lg_align);                               \
+		for (size_t n = 0; n < ptr_ind; n++) {                         \
+			void *ptr = ptrs[n];                                   \
+			__VA_ARGS__;                                           \
+			if (opt_junk_free) {                                   \
+				assert_ptr_eq(ptr, last_junked_ptr, "");       \
+				assert_zu_eq(usize, last_junked_usize, "");    \
+			}                                                      \
+			reset();                                               \
+		}                                                              \
 	} while (0)
 	for (size_t i = 0; i < arraylen(zerovals); i++) {
 		for (size_t j = 0; j < arraylen(sizevals); j++) {
 			for (size_t k = 0; k < arraylen(lg_alignvals); k++) {
-				bool zero = zerovals[i];
+				bool   zero = zerovals[i];
 				size_t size = sizevals[j];
 				size_t lg_align = lg_alignvals[k];
-				size_t usize = nallocx(size,
-				    MALLOCX_LG_ALIGN(lg_align));
+				size_t usize = nallocx(
+				    size, MALLOCX_LG_ALIGN(lg_align));
 
 				JUNK_FREE(free(ptr));
 				JUNK_FREE(dallocx(ptr, 0));
 				JUNK_FREE(dallocx(ptr, MALLOCX_TCACHE_NONE));
-				JUNK_FREE(dallocx(ptr, MALLOCX_LG_ALIGN(
-				    lg_align)));
-				JUNK_FREE(sdallocx(ptr, usize, MALLOCX_LG_ALIGN(
-				    lg_align)));
+				JUNK_FREE(
+				    dallocx(ptr, MALLOCX_LG_ALIGN(lg_align)));
+				JUNK_FREE(sdallocx(
+				    ptr, usize, MALLOCX_LG_ALIGN(lg_align)));
 				JUNK_FREE(sdallocx(ptr, usize,
-				    MALLOCX_TCACHE_NONE | MALLOCX_LG_ALIGN(lg_align)));
+				    MALLOCX_TCACHE_NONE
+				        | MALLOCX_LG_ALIGN(lg_align)));
 				if (opt_zero_realloc_action
 				    == zero_realloc_action_free) {
 					JUNK_FREE(realloc(ptr, 0));
@@ -138,24 +137,24 @@ TEST_BEGIN(test_realloc_expand) {
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = realloc(ptr, SC_LARGE_MINCLASS);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., 0) */
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = rallocx(ptr, SC_LARGE_MINCLASS, 0);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., nonzero) */
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = rallocx(ptr, SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., MALLOCX_ZERO) */
@@ -189,7 +188,5 @@ main(void) {
 	 * We check the last pointer junked.  If a reentrant call happens, that
 	 * might be an internal allocation.
 	 */
-	return test_no_reentrancy(
-	    test_junk_alloc_free,
-	    test_realloc_expand);
+	return test_no_reentrancy(test_junk_alloc_free, test_realloc_expand);
 }
diff --git a/test/unit/large_ralloc.c b/test/unit/large_ralloc.c
new file mode 100644
index 00000000..1f08d125
--- /dev/null
+++ b/test/unit/large_ralloc.c
@@ -0,0 +1,76 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Test that large_ralloc_no_move causes a failure (returns true) when
+ * in-place extent expansion cannot succeed for either usize_max or
+ * usize_min.
+ *
+ * A previous bug omitted the ! negation on the second extent expansion
+ * attempt (usize_min fallback), causing false success (return false) when
+ * the expansion actually failed.
+ */
+TEST_BEGIN(test_large_ralloc_no_move_expand_fail) {
+	/*
+	 * Allocate two adjacent large objects in the same arena to block
+	 * in-place expansion of the first one.
+	 */
+	unsigned arena_ind;
+	size_t   sz = sizeof(arena_ind);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	size_t large_sz = SC_LARGE_MINCLASS;
+	/* Allocate several blocks to prevent expansion of the first. */
+	void *blocks[8];
+	for (size_t i = 0; i < ARRAY_SIZE(blocks); i++) {
+		blocks[i] = mallocx(large_sz, flags);
+		expect_ptr_not_null(blocks[i], "Unexpected mallocx() failure");
+	}
+
+	/*
+	 * Try to expand blocks[0] in place. Use usize_min < usize_max to
+	 * exercise the fallback path.
+	 */
+	tsd_t   *tsd = tsd_fetch();
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, blocks[0]);
+	expect_ptr_not_null(edata, "Unexpected edata lookup failure");
+
+	size_t oldusize = edata_usize_get(edata);
+	size_t usize_min = sz_s2u(oldusize + 1);
+	size_t usize_max = sz_s2u(oldusize * 2);
+
+	/* Ensure min and max are in different size classes. */
+	if (usize_min == usize_max) {
+		usize_max = sz_s2u(usize_min + 1);
+	}
+
+	bool ret = large_ralloc_no_move(
+	    tsd_tsdn(tsd), edata, usize_min, usize_max, false);
+
+	/*
+	 * With adjacent allocations blocking expansion, this should fail.
+	 * The bug caused ret == false (success) even when expansion failed.
+	 */
+	if (!ret) {
+		/*
+		 * Expansion might actually succeed if adjacent memory
+		 * is free.  Verify the size actually changed.
+		 */
+		size_t newusize = edata_usize_get(edata);
+		expect_zu_ge(newusize, usize_min,
+		    "Expansion reported success but size didn't change");
+	}
+
+	for (size_t i = 0; i < ARRAY_SIZE(blocks); i++) {
+		dallocx(blocks[i], flags);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_large_ralloc_no_move_expand_fail);
+}
diff --git a/test/unit/log.c b/test/unit/log.c
index c09b5896..bf4ee1ff 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -18,16 +18,13 @@ expect_no_logging(const char *names) {
 	int count = 0;
 
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l2)
-			count++;
+		    log_do_begin(log_l2) count++;
 		log_do_end(log_l2)
 
-		log_do_begin(log_l2_a)
-			count++;
+		    log_do_begin(log_l2_a) count++;
 		log_do_end(log_l2_a)
 	}
 	expect_d_eq(count, 0, "Disabled logging not ignored!");
@@ -57,8 +54,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 	}
 	expect_d_eq(count, 10, "Mis-logged!");
@@ -66,8 +62,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1.a");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1_a)
-			count++;
+		log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 	}
 	expect_d_eq(count, 10, "Mis-logged!");
@@ -75,12 +70,10 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1.a|abc|l2|def");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1_a)
-			count++;
+		log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 
-		log_do_begin(log_l2)
-			count++;
+		    log_do_begin(log_l2) count++;
 		log_do_end(log_l2)
 	}
 	expect_d_eq(count, 20, "Mis-logged!");
@@ -108,28 +101,22 @@ TEST_BEGIN(test_log_enabled_indirect) {
 	/* 4 are on total, so should sum to 40. */
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l1a)
-			count++;
+		    log_do_begin(log_l1a) count++;
 		log_do_end(log_l1a)
 
-		log_do_begin(log_l1_a)
-			count++;
+		    log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 
-		log_do_begin(log_l2_a)
-			count++;
+		    log_do_begin(log_l2_a) count++;
 		log_do_end(log_l2_a)
 
-		log_do_begin(log_l2_b_a)
-			count++;
+		    log_do_begin(log_l2_b_a) count++;
 		log_do_end(log_l2_b_a)
 
-		log_do_begin(log_l2_b_b)
-			count++;
+		    log_do_begin(log_l2_b_b) count++;
 		log_do_end(log_l2_b_b)
 	}
 
@@ -147,12 +134,10 @@ TEST_BEGIN(test_log_enabled_global) {
 
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-		    count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l2_a_a)
-		    count++;
+		    log_do_begin(log_l2_a_a) count++;
 		log_do_end(log_l2_a_a)
 	}
 	expect_d_eq(count, 20, "Mis-logged!");
@@ -167,8 +152,7 @@ TEST_BEGIN(test_logs_if_no_init) {
 
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(l)
-			count++;
+		log_do_begin(l) count++;
 		log_do_end(l)
 	}
 	expect_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
@@ -188,11 +172,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_log_disabled,
-	    test_log_enabled_direct,
-	    test_log_enabled_indirect,
-	    test_log_enabled_global,
-	    test_logs_if_no_init,
-	    test_log_only_format_string);
+	return test(test_log_disabled, test_log_enabled_direct,
+	    test_log_enabled_indirect, test_log_enabled_global,
+	    test_logs_if_no_init, test_log_only_format_string);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6efc8f1b..11710c27 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -6,26 +6,27 @@
 
 TEST_BEGIN(test_mallctl_errors) {
 	uint64_t epoch;
-	size_t sz;
+	size_t   sz;
 
 	expect_d_eq(mallctl("no_such_name", NULL, NULL, NULL, 0), ENOENT,
 	    "mallctl() should return ENOENT for non-existent names");
 
 	expect_d_eq(mallctl("version", NULL, NULL, "0.0.0", strlen("0.0.0")),
-	    EPERM, "mallctl() should return EPERM on attempt to write "
+	    EPERM,
+	    "mallctl() should return EPERM on attempt to write "
 	    "read-only value");
 
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)-1), EINVAL,
-	    "mallctl() should return EINVAL for input size mismatch");
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)+1), EINVAL,
-	    "mallctl() should return EINVAL for input size mismatch");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch) - 1),
+	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch) + 1),
+	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
 
-	sz = sizeof(epoch)-1;
+	sz = sizeof(epoch) - 1;
 	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
-	sz = sizeof(epoch)+1;
+	sz = sizeof(epoch) + 1;
 	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 }
@@ -35,7 +36,7 @@ TEST_BEGIN(test_mallctlnametomib_errors) {
 	size_t mib[1];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("no_such_name", mib, &miblen), ENOENT,
 	    "mallctlnametomib() should return ENOENT for non-existent names");
 }
@@ -43,34 +44,38 @@ TEST_END
 
 TEST_BEGIN(test_mallctlbymib_errors) {
 	uint64_t epoch;
-	size_t sz;
-	size_t mib[1];
-	size_t miblen;
+	size_t   sz;
+	size_t   mib[1];
+	size_t   miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("version", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, "0.0.0",
-	    strlen("0.0.0")), EPERM, "mallctl() should return EPERM on "
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, NULL, NULL, "0.0.0", strlen("0.0.0")),
+	    EPERM,
+	    "mallctl() should return EPERM on "
 	    "attempt to write read-only value");
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)-1), EINVAL,
+	                sizeof(epoch) - 1),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)+1), EINVAL,
+	                sizeof(epoch) + 1),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 
-	sz = sizeof(epoch)-1;
+	sz = sizeof(epoch) - 1;
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
-	sz = sizeof(epoch)+1;
+	sz = sizeof(epoch) + 1;
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
@@ -79,7 +84,7 @@ TEST_END
 
 TEST_BEGIN(test_mallctl_read_write) {
 	uint64_t old_epoch, new_epoch;
-	size_t sz = sizeof(old_epoch);
+	size_t   sz = sizeof(old_epoch);
 
 	/* Blind. */
 	expect_d_eq(mallctl("epoch", NULL, NULL, NULL, 0), 0,
@@ -92,14 +97,15 @@ TEST_BEGIN(test_mallctl_read_write) {
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Write. */
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
-	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&new_epoch, sizeof(new_epoch)),
+	    0, "Unexpected mallctl() failure");
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read+write. */
 	expect_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
-	    (void *)&new_epoch, sizeof(new_epoch)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&new_epoch, sizeof(new_epoch)),
+	    0, "Unexpected mallctl() failure");
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 }
 TEST_END
@@ -133,10 +139,10 @@ TEST_BEGIN(test_mallctlnametomib_short_name) {
 TEST_END
 
 TEST_BEGIN(test_mallctlmibnametomib) {
-	size_t mib[4];
-	size_t miblen = 4;
+	size_t   mib[4];
+	size_t   miblen = 4;
 	uint32_t result, result_ref;
-	size_t len_result = sizeof(uint32_t);
+	size_t   len_result = sizeof(uint32_t);
 
 	tsd_t *tsd = tsd_fetch();
 
@@ -178,20 +184,21 @@ TEST_BEGIN(test_mallctlmibnametomib) {
 	/* Valid case. */
 	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "nregs", &miblen), 0, "");
 	assert_zu_eq(miblen, 4, "");
-	assert_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
-	    0, "Unexpected mallctlbymib() failure");
-	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	assert_d_eq(
+	    mallctl("arenas.bin.0.nregs", &result_ref, &len_result, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	expect_zu_eq(result, result_ref,
 	    "mallctlbymib() and mallctl() returned different result");
 }
 TEST_END
 
 TEST_BEGIN(test_mallctlbymibname) {
-	size_t mib[4];
-	size_t miblen = 4;
+	size_t   mib[4];
+	size_t   miblen = 4;
 	uint32_t result, result_ref;
-	size_t len_result = sizeof(uint32_t);
+	size_t   len_result = sizeof(uint32_t);
 
 	tsd_t *tsd = tsd_fetch();
 
@@ -202,50 +209,60 @@ TEST_BEGIN(test_mallctlbymibname) {
 	assert_zu_eq(miblen, 1, "");
 
 	miblen = 4;
-	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0", &miblen,
-	    &result, &len_result, NULL, 0), ENOENT, "");
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0", &miblen, &result,
+	                &len_result, NULL, 0),
+	    ENOENT, "");
 	miblen = 4;
-	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.bob", &miblen,
-	    &result, &len_result, NULL, 0), ENOENT, "");
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.bob", &miblen, &result,
+	                &len_result, NULL, 0),
+	    ENOENT, "");
 	assert_zu_eq(miblen, 4, "");
 
 	/* Valid cases. */
 
-	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(
+	    mallctl("arenas.bin.0.nregs", &result_ref, &len_result, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	miblen = 4;
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 0, "arenas.bin.0.nregs", &miblen,
-	    &result, &len_result, NULL, 0), 0, "");
+	                &result, &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 2, "0.nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 3, "nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 }
 TEST_END
 
 TEST_BEGIN(test_mallctl_config) {
-#define TEST_MALLCTL_CONFIG(config, t) do {				\
-	t oldval;							\
-	size_t sz = sizeof(oldval);					\
-	expect_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_b_eq(oldval, config_##config, "Incorrect config value");	\
-	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
-} while (0)
+#define TEST_MALLCTL_CONFIG(config, t)                                         \
+	do {                                                                   \
+		t      oldval;                                                 \
+		size_t sz = sizeof(oldval);                                    \
+		expect_d_eq(                                                   \
+		    mallctl("config." #config, (void *)&oldval, &sz, NULL, 0), \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_b_eq(                                                   \
+		    oldval, config_##config, "Incorrect config value");        \
+		expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");    \
+	} while (0)
 
 	TEST_MALLCTL_CONFIG(cache_oblivious, bool);
 	TEST_MALLCTL_CONFIG(debug, bool);
@@ -255,6 +272,7 @@ TEST_BEGIN(test_mallctl_config) {
 	TEST_MALLCTL_CONFIG(prof, bool);
 	TEST_MALLCTL_CONFIG(prof_libgcc, bool);
 	TEST_MALLCTL_CONFIG(prof_libunwind, bool);
+	TEST_MALLCTL_CONFIG(prof_frameptr, bool);
 	TEST_MALLCTL_CONFIG(stats, bool);
 	TEST_MALLCTL_CONFIG(utrace, bool);
 	TEST_MALLCTL_CONFIG(xmalloc, bool);
@@ -266,32 +284,40 @@ TEST_END
 TEST_BEGIN(test_mallctl_opt) {
 	bool config_always = true;
 
-#define TEST_MALLCTL_OPT(t, opt, config) do {				\
-	t oldval;							\
-	size_t sz = sizeof(oldval);					\
-	int expected = config_##config ? 0 : ENOENT;			\
-	int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL,	\
-	    0);								\
-	expect_d_eq(result, expected,					\
-	    "Unexpected mallctl() result for opt."#opt);		\
-	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
-} while (0)
+#define TEST_MALLCTL_OPT(t, opt, config)                                       \
+	do {                                                                   \
+		t      oldval;                                                 \
+		size_t sz = sizeof(oldval);                                    \
+		int    expected = config_##config ? 0 : ENOENT;                \
+		int    result = mallctl(                                       \
+                    "opt." #opt, (void *)&oldval, &sz, NULL, 0);            \
+		expect_d_eq(result, expected,                                  \
+		    "Unexpected mallctl() result for opt." #opt);              \
+		expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");    \
+	} while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
 	TEST_MALLCTL_OPT(bool, cache_oblivious, always);
 	TEST_MALLCTL_OPT(bool, trust_madvise, always);
+	TEST_MALLCTL_OPT(
+	    bool, experimental_hpa_start_huge_if_thp_always, always);
+	TEST_MALLCTL_OPT(bool, experimental_hpa_enforce_hugify, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
+	TEST_MALLCTL_OPT(bool, hpa_hugify_sync, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
-	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
+	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
+	TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
+	TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always);
+	TEST_MALLCTL_OPT(const char *, hpa_hugify_style, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
@@ -314,8 +340,10 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
+	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
+	TEST_MALLCTL_OPT(bool, prof_pid_namespace, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
 	TEST_MALLCTL_OPT(bool, prof_gdump, prof);
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
@@ -325,6 +353,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
+	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
+	TEST_MALLCTL_OPT(bool, disable_large_size_classes, always);
+	TEST_MALLCTL_OPT(size_t, process_madvise_max_batch, always);
 
 #undef TEST_MALLCTL_OPT
 }
@@ -332,8 +363,8 @@ TEST_END
 
 TEST_BEGIN(test_manpage_example) {
 	unsigned nbins, i;
-	size_t mib[4];
-	size_t len, miblen;
+	size_t   mib[4];
+	size_t   len, miblen;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
@@ -347,8 +378,9 @@ TEST_BEGIN(test_manpage_example) {
 
 		mib[2] = i;
 		len = sizeof(bin_size);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 		/* Do something with bin_size... */
 	}
 }
@@ -371,8 +403,8 @@ TEST_BEGIN(test_tcache_none) {
 	void *p1 = mallocx(42, 0);
 	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
 	if (!opt_prof && !san_uaf_detection_enabled()) {
-		expect_ptr_eq(p0, p1,
-		    "Expected tcache to allocate cached region");
+		expect_ptr_eq(
+		    p0, p1, "Expected tcache to allocate cached region");
 	}
 
 	/* Clean up. */
@@ -381,12 +413,12 @@ TEST_BEGIN(test_tcache_none) {
 TEST_END
 
 TEST_BEGIN(test_tcache) {
-#define NTCACHES	10
+#define NTCACHES 10
 	unsigned tis[NTCACHES];
-	void *ps[NTCACHES];
-	void *qs[NTCACHES];
+	void    *ps[NTCACHES];
+	void    *qs[NTCACHES];
 	unsigned i;
-	size_t sz, psz, qsz;
+	size_t   sz, psz, qsz;
 
 	psz = 42;
 	qsz = nallocx(psz, 0) + 1;
@@ -394,39 +426,41 @@ TEST_BEGIN(test_tcache) {
 	/* Create tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure, i=%u", i);
+		expect_d_eq(
+		    mallctl("tcache.create", (void *)&tis[i], &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Exercise tcache ID recycling. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
-		    (void *)&tis[i], sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		                (void *)&tis[i], sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure, i=%u", i);
+		expect_d_eq(
+		    mallctl("tcache.create", (void *)&tis[i], &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Flush empty tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		                sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Cache some allocations. */
 	for (i = 0; i < NTCACHES; i++) {
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    ps[i], "Unexpected mallocx() failure, i=%u", i);
 		dallocx(ps[i], MALLOCX_TCACHE(tis[i]));
 
 		qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    qs[i], "Unexpected mallocx() failure, i=%u", i);
 		dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
 	}
 
@@ -434,11 +468,13 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *p0 = ps[i];
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    ps[i], "Unexpected mallocx() failure, i=%u", i);
 		if (!san_uaf_detection_enabled()) {
-			expect_ptr_eq(ps[i], p0, "Expected mallocx() to "
-			    "allocate cached region, i=%u", i);
+			expect_ptr_eq(ps[i], p0,
+			    "Expected mallocx() to "
+			    "allocate cached region, i=%u",
+			    i);
 		}
 	}
 
@@ -446,11 +482,13 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *q0 = qs[i];
 		qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    qs[i], "Unexpected rallocx() failure, i=%u", i);
 		if (!san_uaf_detection_enabled()) {
-			expect_ptr_eq(qs[i], q0, "Expected rallocx() to "
-			    "allocate cached region, i=%u", i);
+			expect_ptr_eq(qs[i], q0,
+			    "Expected rallocx() to "
+			    "allocate cached region, i=%u",
+			    i);
 		}
 		/* Avoid undefined behavior in case of test failure. */
 		if (qs[i] == NULL) {
@@ -462,17 +500,17 @@ TEST_BEGIN(test_tcache) {
 	}
 
 	/* Flush some non-empty tcaches. */
-	for (i = 0; i < NTCACHES/2; i++) {
+	for (i = 0; i < NTCACHES / 2; i++) {
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		                sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Destroy tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
-		    (void *)&tis[i], sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		                (void *)&tis[i], sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 }
 TEST_END
@@ -481,7 +519,7 @@ TEST_BEGIN(test_thread_arena) {
 	unsigned old_arena_ind, new_arena_ind, narenas;
 
 	const char *opa;
-	size_t sz = sizeof(opa);
+	size_t      sz = sizeof(opa);
 	expect_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
@@ -496,20 +534,23 @@ TEST_BEGIN(test_thread_arena) {
 	if (strcmp(opa, "disabled") == 0) {
 		new_arena_ind = narenas - 1;
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure");
+		                (void *)&new_arena_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure");
 		new_arena_ind = 0;
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure");
+		                (void *)&new_arena_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure");
 	} else {
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    NULL, 0), 0, "Unexpected mallctl() failure");
+		                NULL, 0),
+		    0, "Unexpected mallctl() failure");
 		new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1;
 		if (old_arena_ind != new_arena_ind) {
-			expect_d_eq(mallctl("thread.arena",
-			    (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,
-			    sizeof(unsigned)), EPERM, "thread.arena ctl "
+			expect_d_eq(
+			    mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+			        (void *)&new_arena_ind, sizeof(unsigned)),
+			    EPERM,
+			    "thread.arena ctl "
 			    "should not be allowed with percpu arena");
 		}
 	}
@@ -518,10 +559,10 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_initialized) {
 	unsigned narenas, i;
-	size_t sz;
-	size_t mib[3];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
-	bool initialized;
+	size_t   sz;
+	size_t   mib[3];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
+	bool     initialized;
 
 	sz = sizeof(narenas);
 	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
@@ -532,8 +573,9 @@ TEST_BEGIN(test_arena_i_initialized) {
 	for (i = 0; i < narenas; i++) {
 		mib[1] = i;
 		sz = sizeof(initialized);
-		expect_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure");
 	}
 
 	mib[1] = MALLCTL_ARENAS_ALL;
@@ -545,10 +587,10 @@ TEST_BEGIN(test_arena_i_initialized) {
 
 	/* Equivalent to the above but using mallctl() directly. */
 	sz = sizeof(initialized);
-	expect_d_eq(mallctl(
-	    "arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
-	    (void *)&initialized, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
+	        (void *)&initialized, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	expect_true(initialized,
 	    "Merged arena statistics should always be initialized");
 }
@@ -556,30 +598,31 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms",
-	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_dirty_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    0, "Unexpected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	    dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
-	    dirty_decay_ms++) {
+	    dirty_decay_ms < 20;
+	    prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.dirty_decay_ms",
-		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_dirty_decay_ms, &sz,
+		                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arena.0.dirty_decay_ms");
 	}
@@ -588,30 +631,31 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
-	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_muzzy_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    0, "Unexpected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	    muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
-	    muzzy_decay_ms++) {
+	    muzzy_decay_ms < 20;
+	    prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
-		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_muzzy_decay_ms, &sz,
+		                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arena.0.muzzy_decay_ms");
 	}
@@ -620,9 +664,9 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_purge) {
 	unsigned narenas;
-	size_t sz = sizeof(unsigned);
-	size_t mib[3];
-	size_t miblen = 3;
+	size_t   sz = sizeof(unsigned);
+	size_t   mib[3];
+	size_t   miblen = 3;
 
 	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -643,9 +687,9 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_decay) {
 	unsigned narenas;
-	size_t sz = sizeof(unsigned);
-	size_t mib[3];
-	size_t miblen = 3;
+	size_t   sz = sizeof(unsigned);
+	size_t   mib[3];
+	size_t   miblen = 3;
 
 	expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -666,46 +710,91 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_dss) {
 	const char *dss_prec_old, *dss_prec_new;
-	size_t sz = sizeof(dss_prec_old);
-	size_t mib[3];
-	size_t miblen;
+	size_t      sz = sizeof(dss_prec_old);
+	size_t      mib[3];
+	size_t      miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 
 	dss_prec_new = "disabled";
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
-	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected default for dss precedence");
+	                (void *)&dss_prec_new, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected default for dss precedence");
 
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
-	    (void *)&dss_prec_old, sizeof(dss_prec_old)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dss_prec_old, sizeof(dss_prec_old)),
+	    0, "Unexpected mallctl() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected value for dss precedence");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected value for dss precedence");
 
 	mib[1] = narenas_total_get();
 	dss_prec_new = "disabled";
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
-	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected default for dss precedence");
+	                (void *)&dss_prec_new, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected default for dss precedence");
 
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
-	    (void *)&dss_prec_old, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dss_prec_old, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected value for dss precedence");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected value for dss precedence");
+}
+TEST_END
+
+TEST_BEGIN(test_arena_i_name) {
+	unsigned    arena_ind;
+	size_t      ind_sz = sizeof(arena_ind);
+	size_t      mib[3];
+	size_t      miblen;
+	char        name_old[ARENA_NAME_LEN];
+	char       *name_oldp = name_old;
+	size_t      sz = sizeof(name_oldp);
+	char        default_name[ARENA_NAME_LEN];
+	const char *name_new = "test name";
+	const char *super_long_name = "A name longer than ARENA_NAME_LEN";
+	size_t      super_long_name_len = strlen(super_long_name);
+	assert(super_long_name_len > ARENA_NAME_LEN);
+
+	miblen = sizeof(mib) / sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.name", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() error");
+
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&arena_ind, &ind_sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	mib[1] = arena_ind;
+
+	malloc_snprintf(
+	    default_name, sizeof(default_name), "manual_%u", arena_ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
+	                (void *)&name_new, sizeof(name_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_eq(
+	    name_old, default_name, "Unexpected default value for arena name");
+
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
+	                (void *)&super_long_name, sizeof(super_long_name)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_eq(name_old, name_new, "Unexpected value for arena name");
+
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	int cmp = strncmp(name_old, super_long_name, ARENA_NAME_LEN - 1);
+	expect_true(cmp == 0, "Unexpected value for long arena name ");
 }
 TEST_END
 
@@ -714,14 +803,14 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 	size_t mib[3];
 	size_t miblen;
 
-	bool retain_enabled;
+	bool   retain_enabled;
 	size_t sz = sizeof(retain_enabled);
-	expect_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	test_skip_if(!retain_enabled);
 
 	sz = sizeof(default_limit);
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() error");
 
@@ -731,58 +820,62 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 	    "Unexpected default for retain_grow_limit");
 
 	new_limit = PAGE - 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), EFAULT, "Unexpected mallctl() success");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	new_limit = PAGE + 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    0, "Unexpected mallctl() failure");
 	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_zu_eq(old_limit, PAGE,
-	    "Unexpected value for retain_grow_limit");
+	expect_zu_eq(old_limit, PAGE, "Unexpected value for retain_grow_limit");
 
 	/* Expect grow less than psize class 10. */
 	new_limit = sz_pind2sz(10) - 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    0, "Unexpected mallctl() failure");
 	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_zu_eq(old_limit, sz_pind2sz(9),
-	    "Unexpected value for retain_grow_limit");
+	expect_zu_eq(
+	    old_limit, sz_pind2sz(9), "Unexpected value for retain_grow_limit");
 
 	/* Restore to default. */
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
-	    sizeof(default_limit)), 0, "Unexpected mallctl() failure");
+	                sizeof(default_limit)),
+	    0, "Unexpected mallctl() failure");
 }
 TEST_END
 
 TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arenas.dirty_decay_ms",
-	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_dirty_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
 	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
-	    "Expected mallctl() failure");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    0, "Expected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	    dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
-	    dirty_decay_ms++) {
+	    dirty_decay_ms < 20;
+	    prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arenas.dirty_decay_ms",
-		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_dirty_decay_ms, &sz,
+		                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arenas.dirty_decay_ms");
 	}
@@ -791,30 +884,31 @@ TEST_END
 
 TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms",
-	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_muzzy_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
-	    "Expected mallctl() failure");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    0, "Expected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	    muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
-	    muzzy_decay_ms++) {
+	    muzzy_decay_ms < 20;
+	    prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arenas.muzzy_decay_ms",
-		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_muzzy_decay_ms, &sz,
+		                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arenas.muzzy_decay_ms");
 	}
@@ -822,16 +916,19 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 TEST_END
 
 TEST_BEGIN(test_arenas_constants) {
-#define TEST_ARENAS_CONSTANT(t, name, expected) do {			\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+#define TEST_ARENAS_CONSTANT(t, name, expected)                                \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(                                                   \
+		    mallctl("arenas." #name, (void *)&name, &sz, NULL, 0), 0,  \
+		    "Unexpected mallctl() failure");                           \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
 
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
 	TEST_ARENAS_CONSTANT(size_t, page, PAGE);
+	TEST_ARENAS_CONSTANT(size_t, hugepage, HUGEPAGE);
 	TEST_ARENAS_CONSTANT(unsigned, nbins, SC_NBINS);
 	TEST_ARENAS_CONSTANT(unsigned, nlextents, SC_NSIZES - SC_NBINS);
 
@@ -840,35 +937,136 @@ TEST_BEGIN(test_arenas_constants) {
 TEST_END
 
 TEST_BEGIN(test_arenas_bin_constants) {
-#define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+#define TEST_ARENAS_BIN_CONSTANT(t, name, expected)                            \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("arenas.bin.0." #name, (void *)&name, &sz, \
+		                NULL, 0),                                      \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
 
 	TEST_ARENAS_BIN_CONSTANT(size_t, size, bin_infos[0].reg_size);
 	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, bin_infos[0].nregs);
-	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size,
-	    bin_infos[0].slab_size);
+	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size, bin_infos[0].slab_size);
 	TEST_ARENAS_BIN_CONSTANT(uint32_t, nshards, bin_infos[0].n_shards);
 
 #undef TEST_ARENAS_BIN_CONSTANT
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_lextent_constants) {
-#define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
-	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+TEST_BEGIN(test_arenas_bin_oob) {
+	size_t sz;
+	size_t result;
+	char   buf[128];
 
-	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size,
-	    SC_LARGE_MINCLASS);
+	/*
+	 * Querying the bin at index SC_NBINS should fail because valid
+	 * indices are [0, SC_NBINS).
+	 */
+	sz = sizeof(result);
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.bin.%u.size", (unsigned)SC_NBINS);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds bin index SC_NBINS");
+
+	/* One below the boundary should succeed. */
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.bin.%u.size", (unsigned)(SC_NBINS - 1));
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid bin index SC_NBINS-1");
+}
+TEST_END
+
+TEST_BEGIN(test_arenas_lextent_oob) {
+	size_t   sz;
+	size_t   result;
+	char     buf[128];
+	unsigned nlextents = SC_NSIZES - SC_NBINS;
+
+	/*
+	 * Querying the lextent at index nlextents should fail because valid
+	 * indices are [0, nlextents).
+	 */
+	sz = sizeof(result);
+	malloc_snprintf(buf, sizeof(buf), "arenas.lextent.%u.size", nlextents);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds lextent index");
+
+	/* Querying the last element (nlextents - 1) should succeed. */
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.lextent.%u.size", nlextents - 1);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid lextent index");
+}
+TEST_END
+
+TEST_BEGIN(test_stats_arenas_bins_oob) {
+	test_skip_if(!config_stats);
+	size_t   sz;
+	uint64_t result;
+	char     buf[128];
+
+	uint64_t epoch = 1;
+	sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	/* SC_NBINS is one past the valid range. */
+	sz = sizeof(result);
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.bins.%u.nmalloc",
+	    (unsigned)SC_NBINS);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds stats bin index");
+
+	/* SC_NBINS - 1 is valid. */
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.bins.%u.nmalloc",
+	    (unsigned)(SC_NBINS - 1));
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid stats bin index");
+}
+TEST_END
+
+TEST_BEGIN(test_stats_arenas_lextents_oob) {
+	test_skip_if(!config_stats);
+	size_t   sz;
+	uint64_t result;
+	char     buf[128];
+	unsigned nlextents = SC_NSIZES - SC_NBINS;
+
+	uint64_t epoch = 1;
+	sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	/* nlextents is one past the valid range. */
+	sz = sizeof(result);
+	malloc_snprintf(
+	    buf, sizeof(buf), "stats.arenas.0.lextents.%u.nmalloc", nlextents);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds stats lextent index");
+
+	/* nlextents - 1 is valid. */
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.lextents.%u.nmalloc",
+	    nlextents - 1);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid stats lextent index");
+}
+TEST_END
+
+TEST_BEGIN(test_arenas_lextent_constants) {
+#define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected)                        \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("arenas.lextent.0." #name, (void *)&name,  \
+		                &sz, NULL, 0),                                 \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
+
+	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size, SC_LARGE_MINCLASS);
 
 #undef TEST_ARENAS_LEXTENT_CONSTANT
 }
@@ -876,25 +1074,27 @@ TEST_END
 
 TEST_BEGIN(test_arenas_create) {
 	unsigned narenas_before, arena, narenas_after;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
-	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.narenas", (void *)&narenas_before, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
-	expect_u_eq(narenas_before+1, narenas_after,
+	expect_u_eq(narenas_before + 1, narenas_after,
 	    "Unexpected number of arenas before versus after extension");
-	expect_u_eq(arena, narenas_after-1, "Unexpected arena index");
+	expect_u_eq(arena, narenas_after - 1, "Unexpected arena index");
 }
 TEST_END
 
 TEST_BEGIN(test_arenas_lookup) {
 	unsigned arena, arena1;
-	void *ptr;
-	size_t sz = sizeof(unsigned);
+	void    *ptr;
+	size_t   sz = sizeof(unsigned);
 
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -915,7 +1115,7 @@ TEST_BEGIN(test_prof_active) {
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof);
 
-	bool active, old;
+	bool   active, old;
 	size_t len = sizeof(bool);
 
 	active = true;
@@ -935,12 +1135,14 @@ TEST_BEGIN(test_prof_active) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas) {
-#define TEST_STATS_ARENAS(t, name) do {					\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-} while (0)
+#define TEST_STATS_ARENAS(t, name)                                             \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("stats.arenas.0." #name, (void *)&name,    \
+		                &sz, NULL, 0),                                 \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
 
 	TEST_STATS_ARENAS(unsigned, nthreads);
 	TEST_STATS_ARENAS(const char *, dss);
@@ -953,6 +1155,67 @@ TEST_BEGIN(test_stats_arenas) {
 }
 TEST_END
 
+TEST_BEGIN(test_stats_arenas_hpa_shard_counters) {
+	test_skip_if(!config_stats);
+
+#define TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(t, name)                          \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("stats.arenas.0.hpa_shard." #name,         \
+		                (void *)&name, &sz, NULL, 0),                  \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
+
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, ndirty);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, npurge_passes);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, npurges);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, nhugifies);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, ndehugifies);
+
+#undef TEST_STATS_ARENAS_HPA_SHARD_COUNTERS
+}
+TEST_END
+
+TEST_BEGIN(test_stats_arenas_hpa_shard_slabs) {
+	test_skip_if(!config_stats);
+
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name)                   \
+	do {                                                                   \
+		t      slab##_##name;                                          \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(                                                   \
+		    mallctl("stats.arenas.0.hpa_shard." #slab "." #name,       \
+		        (void *)&slab##_##name, &sz, NULL, 0),                 \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
+
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS(t, slab, name)                       \
+	do {                                                                   \
+		TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(                         \
+		    t, slab, name##_##nonhuge);                                \
+		TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name##_##huge); \
+	} while (0)
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, ndirty);
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, ndirty);
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, ndirty);
+
+#undef TEST_STATS_ARENAS_HPA_SHARD_SLABS
+#undef TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN
+}
+TEST_END
+
 static void
 alloc_hook(void *extra, UNUSED hook_alloc_t type, UNUSED void *result,
     UNUSED uintptr_t result_raw, UNUSED uintptr_t args_raw[3]) {
@@ -960,18 +1223,18 @@ alloc_hook(void *extra, UNUSED hook_alloc_t type, UNUSED void *result,
 }
 
 static void
-dalloc_hook(void *extra, UNUSED hook_dalloc_t type,
-    UNUSED void *address, UNUSED uintptr_t args_raw[3]) {
+dalloc_hook(void *extra, UNUSED hook_dalloc_t type, UNUSED void *address,
+    UNUSED uintptr_t args_raw[3]) {
 	*(bool *)extra = true;
 }
 
 TEST_BEGIN(test_hooks) {
-	bool hook_called = false;
+	bool    hook_called = false;
 	hooks_t hooks = {&alloc_hook, &dalloc_hook, NULL, &hook_called};
-	void *handle = NULL;
-	size_t sz = sizeof(handle);
-	int err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	void   *handle = NULL;
+	size_t  sz = sizeof(handle);
+	int     err = mallctl(
+            "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, 0, "Hook installation failed");
 	expect_ptr_ne(handle, NULL, "Hook installation gave null handle");
 	void *ptr = mallocx(1, 0);
@@ -980,8 +1243,8 @@ TEST_BEGIN(test_hooks) {
 	free(ptr);
 	expect_true(hook_called, "Free hook not called");
 
-	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
-	    sizeof(handle));
+	err = mallctl(
+	    "experimental.hooks.remove", NULL, NULL, &handle, sizeof(handle));
 	expect_d_eq(err, 0, "Hook removal failed");
 	hook_called = false;
 	ptr = mallocx(1, 0);
@@ -991,13 +1254,13 @@ TEST_BEGIN(test_hooks) {
 TEST_END
 
 TEST_BEGIN(test_hooks_exhaustion) {
-	bool hook_called = false;
+	bool    hook_called = false;
 	hooks_t hooks = {&alloc_hook, &dalloc_hook, NULL, &hook_called};
 
-	void *handle;
-	void *handles[HOOK_MAX];
+	void  *handle;
+	void  *handles[HOOK_MAX];
 	size_t sz = sizeof(handle);
-	int err;
+	int    err;
 	for (int i = 0; i < HOOK_MAX; i++) {
 		handle = NULL;
 		err = mallctl("experimental.hooks.install", &handle, &sz,
@@ -1006,8 +1269,8 @@ TEST_BEGIN(test_hooks_exhaustion) {
 		expect_ptr_ne(handle, NULL, "Got NULL handle");
 		handles[i] = handle;
 	}
-	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	err = mallctl(
+	    "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, EAGAIN, "Should have failed hook installation");
 	for (int i = 0; i < HOOK_MAX; i++) {
 		err = mallctl("experimental.hooks.remove", NULL, NULL,
@@ -1016,12 +1279,12 @@ TEST_BEGIN(test_hooks_exhaustion) {
 	}
 	/* Insertion failed, but then we removed some; it should work now. */
 	handle = NULL;
-	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	err = mallctl(
+	    "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, 0, "Hook insertion failed");
 	expect_ptr_ne(handle, NULL, "Got NULL handle");
-	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
-	    sizeof(handle));
+	err = mallctl(
+	    "experimental.hooks.remove", NULL, NULL, &handle, sizeof(handle));
 	expect_d_eq(err, 0, "Hook removal failed");
 }
 TEST_END
@@ -1035,7 +1298,7 @@ TEST_BEGIN(test_thread_idle) {
 	 */
 	test_skip_if(!config_stats);
 
-	int err;
+	int    err;
 	size_t sz;
 	size_t miblen;
 
@@ -1053,21 +1316,27 @@ TEST_BEGIN(test_thread_idle) {
 
 	unsigned arena_ind;
 	sz = sizeof(arena_ind);
-	err = mallctl("thread.arena", &arena_ind, &sz, NULL, 0);
-	expect_d_eq(err, 0, "");
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	err = mallctl(
+	    "thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind));
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
+	err = mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
 
 	/* We're going to do an allocation of size 1, which we know is small. */
 	size_t mib[5];
-	miblen = sizeof(mib)/sizeof(mib[0]);
+	miblen = sizeof(mib) / sizeof(mib[0]);
 	err = mallctlnametomib("stats.arenas.0.small.ndalloc", mib, &miblen);
 	expect_d_eq(err, 0, "");
 	mib[2] = arena_ind;
 
 	/*
-	 * This alloc and dalloc should leave something in the tcache, in a
-	 * small size's cache bin.
+	 * This alloc and dalloc should leave something (from the newly created
+	 * arena) in the tcache, in a small size's cache bin.  Later the stats
+	 * of that arena will be checked to verify if tcache flush happened.
 	 */
-	void *ptr = mallocx(1, 0);
+	void *ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 	dallocx(ptr, 0);
 
 	uint64_t epoch;
@@ -1106,9 +1375,9 @@ TEST_BEGIN(test_thread_peak) {
 	size_t big_size = 10 * 1024 * 1024;
 	size_t small_size = 256;
 
-	void *ptr;
-	int err;
-	size_t sz;
+	void    *ptr;
+	int      err;
+	size_t   sz;
 	uint64_t peak;
 	sz = sizeof(uint64_t);
 
@@ -1162,113 +1431,59 @@ TEST_BEGIN(test_thread_peak) {
 }
 TEST_END
 
-typedef struct activity_test_data_s activity_test_data_t;
-struct activity_test_data_s {
-	uint64_t obtained_alloc;
-	uint64_t obtained_dalloc;
+static unsigned nuser_thread_event_cb_calls;
+static void
+user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
+	(void)tdallocated;
+	(void)tallocated;
+	++nuser_thread_event_cb_calls;
+}
+static user_hook_object_t user_te_obj = {
+    .callback = user_thread_event_cb,
+    .interval = 100,
+    .is_alloc_only = false,
 };
 
-static void
-activity_test_callback(void *uctx, uint64_t alloc, uint64_t dalloc) {
-	activity_test_data_t *test_data = (activity_test_data_t *)uctx;
-	test_data->obtained_alloc = alloc;
-	test_data->obtained_dalloc = dalloc;
-}
-
-TEST_BEGIN(test_thread_activity_callback) {
-	test_skip_if(!config_stats);
-
+TEST_BEGIN(test_thread_event_hook) {
 	const size_t big_size = 10 * 1024 * 1024;
-	void *ptr;
-	int err;
-	size_t sz;
+	void        *ptr;
+	int          err;
 
-	uint64_t *allocatedp;
-	uint64_t *deallocatedp;
-	sz = sizeof(allocatedp);
-	err = mallctl("thread.allocatedp", &allocatedp, &sz, NULL, 0);
-	assert_d_eq(0, err, "");
-	err = mallctl("thread.deallocatedp", &deallocatedp, &sz, NULL, 0);
+	unsigned current_calls = nuser_thread_event_cb_calls;
+	err = mallctl("experimental.hooks.thread_event", NULL, 0, &user_te_obj,
+	    sizeof(user_te_obj));
 	assert_d_eq(0, err, "");
 
-	activity_callback_thunk_t old_thunk = {(activity_callback_t)111,
-		(void *)222};
-
-	activity_test_data_t test_data = {333, 444};
-	activity_callback_thunk_t new_thunk =
-	    {&activity_test_callback, &test_data};
-
-	sz = sizeof(old_thunk);
-	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
-	    &new_thunk, sizeof(new_thunk));
-	assert_d_eq(0, err, "");
-
-	expect_true(old_thunk.callback == NULL, "Callback already installed");
-	expect_true(old_thunk.uctx == NULL, "Callback data already installed");
+	err = mallctl("experimental.hooks.thread_event", NULL, 0, &user_te_obj,
+	    sizeof(user_te_obj));
+	assert_d_eq(
+	    0, err, "Not an error to provide object with same interval and cb");
 
 	ptr = mallocx(big_size, 0);
-	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
-	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
-
 	free(ptr);
-	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
-	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
-
-	sz = sizeof(old_thunk);
-	new_thunk = (activity_callback_thunk_t){ NULL, NULL };
-	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
-	    &new_thunk, sizeof(new_thunk));
-	assert_d_eq(0, err, "");
-
-	expect_true(old_thunk.callback == &activity_test_callback, "");
-	expect_true(old_thunk.uctx == &test_data, "");
-
-	/* Inserting NULL should have turned off tracking. */
-	test_data.obtained_alloc = 333;
-	test_data.obtained_dalloc = 444;
-	ptr = mallocx(big_size, 0);
-	free(ptr);
-	expect_u64_eq(333, test_data.obtained_alloc, "");
-	expect_u64_eq(444, test_data.obtained_dalloc, "");
+	expect_u64_lt(current_calls, nuser_thread_event_cb_calls, "");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_mallctl_errors,
-	    test_mallctlnametomib_errors,
-	    test_mallctlbymib_errors,
-	    test_mallctl_read_write,
-	    test_mallctlnametomib_short_mib,
-	    test_mallctlnametomib_short_name,
-	    test_mallctlmibnametomib,
-	    test_mallctlbymibname,
-	    test_mallctl_config,
-	    test_mallctl_opt,
-	    test_manpage_example,
-	    test_tcache_none,
-	    test_tcache,
-	    test_thread_arena,
-	    test_arena_i_initialized,
-	    test_arena_i_dirty_decay_ms,
-	    test_arena_i_muzzy_decay_ms,
-	    test_arena_i_purge,
-	    test_arena_i_decay,
-	    test_arena_i_dss,
-	    test_arena_i_retain_grow_limit,
-	    test_arenas_dirty_decay_ms,
-	    test_arenas_muzzy_decay_ms,
-	    test_arenas_constants,
-	    test_arenas_bin_constants,
-	    test_arenas_lextent_constants,
-	    test_arenas_create,
-	    test_arenas_lookup,
-	    test_prof_active,
-	    test_stats_arenas,
-	    test_hooks,
-	    test_hooks_exhaustion,
-	    test_thread_idle,
-	    test_thread_peak,
-	    test_thread_activity_callback);
+	return test(test_mallctl_errors, test_mallctlnametomib_errors,
+	    test_mallctlbymib_errors, test_mallctl_read_write,
+	    test_mallctlnametomib_short_mib, test_mallctlnametomib_short_name,
+	    test_mallctlmibnametomib, test_mallctlbymibname,
+	    test_mallctl_config, test_mallctl_opt, test_manpage_example,
+	    test_tcache_none, test_tcache, test_thread_arena,
+	    test_arena_i_initialized, test_arena_i_dirty_decay_ms,
+	    test_arena_i_muzzy_decay_ms, test_arena_i_purge, test_arena_i_decay,
+	    test_arena_i_dss, test_arena_i_name, test_arena_i_retain_grow_limit,
+	    test_arenas_dirty_decay_ms, test_arenas_muzzy_decay_ms,
+	    test_arenas_constants, test_arenas_bin_constants,
+	    test_arenas_bin_oob, test_arenas_lextent_oob,
+	    test_stats_arenas_bins_oob, test_stats_arenas_lextents_oob,
+	    test_arenas_lextent_constants, test_arenas_create,
+	    test_arenas_lookup, test_prof_active, test_stats_arenas,
+	    test_stats_arenas_hpa_shard_counters,
+	    test_stats_arenas_hpa_shard_slabs, test_hooks,
+	    test_hooks_exhaustion, test_thread_idle, test_thread_peak,
+	    test_thread_event_hook);
 }
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
index ecfa4991..667e7006 100644
--- a/test/unit/malloc_conf_2.c
+++ b/test/unit/malloc_conf_2.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "dirty_decay_ms:1000";
+const char *malloc_conf = "dirty_decay_ms:1000,muzzy_decay_ms:2000";
 const char *malloc_conf_2_conf_harder = "dirty_decay_ms:1234";
 
 TEST_BEGIN(test_malloc_conf_2) {
@@ -13,17 +13,63 @@ TEST_BEGIN(test_malloc_conf_2) {
 	test_skip_if(windows);
 
 	ssize_t dirty_decay_ms;
-	size_t sz = sizeof(dirty_decay_ms);
+	size_t  sz = sizeof(dirty_decay_ms);
 
 	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
 	assert_d_eq(err, 0, "Unexpected mallctl failure");
-	expect_zd_eq(dirty_decay_ms, 1234,
-	    "malloc_conf_2 setting didn't take effect");
+	expect_zd_eq(
+	    dirty_decay_ms, 1234, "malloc_conf_2 setting didn't take effect");
+}
+TEST_END
+
+TEST_BEGIN(test_mallctl_global_var) {
+#ifdef _WIN32
+	bool windows = true;
+#else
+	bool windows = false;
+#endif
+	/* Windows doesn't support weak symbol linker trickery. */
+	test_skip_if(windows);
+
+	const char *mc;
+	size_t      sz = sizeof(mc);
+	expect_d_eq(
+	    mallctl("opt.malloc_conf.global_var", (void *)&mc, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf,
+	    "Unexpected value for the global variable "
+	    "malloc_conf");
+
+	expect_d_eq(mallctl("opt.malloc_conf.global_var_2_conf_harder",
+	                (void *)&mc, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf_2_conf_harder,
+	    "Unexpected value for the "
+	    "global variable malloc_conf_2_conf_harder");
+}
+TEST_END
+
+TEST_BEGIN(test_non_conflicting_var) {
+#ifdef _WIN32
+	bool windows = true;
+#else
+	bool windows = false;
+#endif
+	/* Windows doesn't support weak symbol linker trickery. */
+	test_skip_if(windows);
+
+	ssize_t muzzy_decay_ms;
+	size_t  sz = sizeof(muzzy_decay_ms);
+
+	int err = mallctl("opt.muzzy_decay_ms", &muzzy_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(muzzy_decay_ms, 2000,
+	    "Non-conflicting option from malloc_conf should pass through");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_malloc_conf_2);
+	return test(test_malloc_conf_2, test_mallctl_global_var,
+	    test_non_conflicting_var);
 }
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index 385f7450..ee744a78 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -14,77 +14,68 @@ TEST_BEGIN(test_malloc_strtoumax) {
 	struct test_s {
 		const char *input;
 		const char *expected_remainder;
-		int base;
-		int expected_errno;
+		int         base;
+		int         expected_errno;
 		const char *expected_errno_name;
-		uintmax_t expected_x;
+		uintmax_t   expected_x;
 	};
-#define ERR(e)		e, #e
-#define KUMAX(x)	((uintmax_t)x##ULL)
-#define KSMAX(x)	((uintmax_t)(intmax_t)x##LL)
-	struct test_s tests[] = {
-		{"0",		"0",	-1,	ERR(EINVAL),	UINTMAX_MAX},
-		{"0",		"0",	1,	ERR(EINVAL),	UINTMAX_MAX},
-		{"0",		"0",	37,	ERR(EINVAL),	UINTMAX_MAX},
+#define ERR(e) e, #e
+#define KUMAX(x) ((uintmax_t)x##ULL)
+#define KSMAX(x) ((uintmax_t)(intmax_t)x##LL)
+	struct test_s tests[] = {{"0", "0", -1, ERR(EINVAL), UINTMAX_MAX},
+	    {"0", "0", 1, ERR(EINVAL), UINTMAX_MAX},
+	    {"0", "0", 37, ERR(EINVAL), UINTMAX_MAX},
 
-		{"",		"",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"+",		"+",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"++3",		"++3",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"-",		"-",	0,	ERR(EINVAL),	UINTMAX_MAX},
+	    {"", "", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"+", "+", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"++3", "++3", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"-", "-", 0, ERR(EINVAL), UINTMAX_MAX},
 
-		{"42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"+42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"-42",		"",	0,	ERR(0),		KSMAX(-42)},
-		{"042",		"",	0,	ERR(0),		KUMAX(042)},
-		{"+042",	"",	0,	ERR(0),		KUMAX(042)},
-		{"-042",	"",	0,	ERR(0),		KSMAX(-042)},
-		{"0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
-		{"+0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
-		{"-0x42",	"",	0,	ERR(0),		KSMAX(-0x42)},
+	    {"42", "", 0, ERR(0), KUMAX(42)}, {"+42", "", 0, ERR(0), KUMAX(42)},
+	    {"-42", "", 0, ERR(0), KSMAX(-42)},
+	    {"042", "", 0, ERR(0), KUMAX(042)},
+	    {"+042", "", 0, ERR(0), KUMAX(042)},
+	    {"-042", "", 0, ERR(0), KSMAX(-042)},
+	    {"0x42", "", 0, ERR(0), KUMAX(0x42)},
+	    {"+0x42", "", 0, ERR(0), KUMAX(0x42)},
+	    {"-0x42", "", 0, ERR(0), KSMAX(-0x42)},
 
-		{"0",		"",	0,	ERR(0),		KUMAX(0)},
-		{"1",		"",	0,	ERR(0),		KUMAX(1)},
+	    {"0", "", 0, ERR(0), KUMAX(0)}, {"1", "", 0, ERR(0), KUMAX(1)},
 
-		{"42",		"",	0,	ERR(0),		KUMAX(42)},
-		{" 42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"42 ",		" ",	0,	ERR(0),		KUMAX(42)},
-		{"0x",		"x",	0,	ERR(0),		KUMAX(0)},
-		{"42x",		"x",	0,	ERR(0),		KUMAX(42)},
+	    {"42", "", 0, ERR(0), KUMAX(42)}, {" 42", "", 0, ERR(0), KUMAX(42)},
+	    {"42 ", " ", 0, ERR(0), KUMAX(42)},
+	    {"0x", "x", 0, ERR(0), KUMAX(0)},
+	    {"42x", "x", 0, ERR(0), KUMAX(42)},
 
-		{"07",		"",	0,	ERR(0),		KUMAX(7)},
-		{"010",		"",	0,	ERR(0),		KUMAX(8)},
-		{"08",		"8",	0,	ERR(0),		KUMAX(0)},
-		{"0_",		"_",	0,	ERR(0),		KUMAX(0)},
+	    {"07", "", 0, ERR(0), KUMAX(7)}, {"010", "", 0, ERR(0), KUMAX(8)},
+	    {"08", "8", 0, ERR(0), KUMAX(0)}, {"0_", "_", 0, ERR(0), KUMAX(0)},
 
-		{"0x",		"x",	0,	ERR(0),		KUMAX(0)},
-		{"0X",		"X",	0,	ERR(0),		KUMAX(0)},
-		{"0xg",		"xg",	0,	ERR(0),		KUMAX(0)},
-		{"0XA",		"",	0,	ERR(0),		KUMAX(10)},
+	    {"0x", "x", 0, ERR(0), KUMAX(0)}, {"0X", "X", 0, ERR(0), KUMAX(0)},
+	    {"0xg", "xg", 0, ERR(0), KUMAX(0)},
+	    {"0XA", "", 0, ERR(0), KUMAX(10)},
 
-		{"010",		"",	10,	ERR(0),		KUMAX(10)},
-		{"0x3",		"x3",	10,	ERR(0),		KUMAX(0)},
+	    {"010", "", 10, ERR(0), KUMAX(10)},
+	    {"0x3", "x3", 10, ERR(0), KUMAX(0)},
 
-		{"12",		"2",	2,	ERR(0),		KUMAX(1)},
-		{"78",		"8",	8,	ERR(0),		KUMAX(7)},
-		{"9a",		"a",	10,	ERR(0),		KUMAX(9)},
-		{"9A",		"A",	10,	ERR(0),		KUMAX(9)},
-		{"fg",		"g",	16,	ERR(0),		KUMAX(15)},
-		{"FG",		"G",	16,	ERR(0),		KUMAX(15)},
-		{"0xfg",	"g",	16,	ERR(0),		KUMAX(15)},
-		{"0XFG",	"G",	16,	ERR(0),		KUMAX(15)},
-		{"z_",		"_",	36,	ERR(0),		KUMAX(35)},
-		{"Z_",		"_",	36,	ERR(0),		KUMAX(35)}
-	};
+	    {"12", "2", 2, ERR(0), KUMAX(1)}, {"78", "8", 8, ERR(0), KUMAX(7)},
+	    {"9a", "a", 10, ERR(0), KUMAX(9)},
+	    {"9A", "A", 10, ERR(0), KUMAX(9)},
+	    {"fg", "g", 16, ERR(0), KUMAX(15)},
+	    {"FG", "G", 16, ERR(0), KUMAX(15)},
+	    {"0xfg", "g", 16, ERR(0), KUMAX(15)},
+	    {"0XFG", "G", 16, ERR(0), KUMAX(15)},
+	    {"z_", "_", 36, ERR(0), KUMAX(35)},
+	    {"Z_", "_", 36, ERR(0), KUMAX(35)}};
 #undef ERR
 #undef KUMAX
 #undef KSMAX
 	unsigned i;
 
-	for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) {
+	for (i = 0; i < sizeof(tests) / sizeof(struct test_s); i++) {
 		struct test_s *test = &tests[i];
-		int err;
-		uintmax_t result;
-		char *remainder;
+		int            err;
+		uintmax_t      result;
+		char          *remainder;
 
 		set_errno(0);
 		result = malloc_strtoumax(test->input, &remainder, test->base);
@@ -93,8 +84,8 @@ TEST_BEGIN(test_malloc_strtoumax) {
 		    "Expected errno %s for \"%s\", base %d",
 		    test->expected_errno_name, test->input, test->base);
 		expect_str_eq(remainder, test->expected_remainder,
-		    "Unexpected remainder for \"%s\", base %d",
-		    test->input, test->base);
+		    "Unexpected remainder for \"%s\", base %d", test->input,
+		    test->base);
 		if (err == 0) {
 			expect_ju_eq(result, test->expected_x,
 			    "Unexpected result for \"%s\", base %d",
@@ -105,31 +96,32 @@ TEST_BEGIN(test_malloc_strtoumax) {
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf_truncated) {
-#define BUFLEN	15
-	char buf[BUFLEN];
+#define BUFLEN 15
+	char   buf[BUFLEN];
 	size_t result;
 	size_t len;
-#define TEST(expected_str_untruncated, ...) do {			\
-	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
-	expect_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
-	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
-	    buf, expected_str_untruncated);				\
-	expect_zu_eq(result, strlen(expected_str_untruncated),		\
-	    "Unexpected result");					\
-} while (0)
+#define TEST(expected_str_untruncated, ...)                                    \
+	do {                                                                   \
+		result = malloc_snprintf(buf, len, __VA_ARGS__);               \
+		expect_d_eq(strncmp(buf, expected_str_untruncated, len - 1),   \
+		    0, "Unexpected string inequality (\"%s\" vs \"%s\")", buf, \
+		    expected_str_untruncated);                                 \
+		expect_zu_eq(result, strlen(expected_str_untruncated),         \
+		    "Unexpected result");                                      \
+	} while (0)
 
 	for (len = 1; len < BUFLEN; len++) {
-		TEST("012346789",	"012346789");
-		TEST("a0123b",		"a%sb", "0123");
-		TEST("a01234567",	"a%s%s", "0123", "4567");
-		TEST("a0123  ",		"a%-6s", "0123");
-		TEST("a  0123",		"a%6s", "0123");
-		TEST("a   012",		"a%6.3s", "0123");
-		TEST("a   012",		"a%*.*s", 6, 3, "0123");
-		TEST("a 123b",		"a% db", 123);
-		TEST("a123b",		"a%-db", 123);
-		TEST("a-123b",		"a%-db", -123);
-		TEST("a+123b",		"a%+db", 123);
+		TEST("012346789", "012346789");
+		TEST("a0123b", "a%sb", "0123");
+		TEST("a01234567", "a%s%s", "0123", "4567");
+		TEST("a0123  ", "a%-6s", "0123");
+		TEST("a  0123", "a%6s", "0123");
+		TEST("a   012", "a%6.3s", "0123");
+		TEST("a   012", "a%*.*s", 6, 3, "0123");
+		TEST("a 123b", "a% db", 123);
+		TEST("a123b", "a%-db", 123);
+		TEST("a-123b", "a%-db", -123);
+		TEST("a+123b", "a%+db", 123);
 	}
 #undef BUFLEN
 #undef TEST
@@ -137,14 +129,16 @@ TEST_BEGIN(test_malloc_snprintf_truncated) {
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf) {
-#define BUFLEN	128
-	char buf[BUFLEN];
+#define BUFLEN 128
+	char   buf[BUFLEN];
 	size_t result;
-#define TEST(expected_str, ...) do {					\
-	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
-	expect_str_eq(buf, expected_str, "Unexpected output");		\
-	expect_zu_eq(result, strlen(expected_str), "Unexpected result");\
-} while (0)
+#define TEST(expected_str, ...)                                                \
+	do {                                                                   \
+		result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);       \
+		expect_str_eq(buf, expected_str, "Unexpected output");         \
+		expect_zu_eq(                                                  \
+		    result, strlen(expected_str), "Unexpected result");        \
+	} while (0)
 
 	TEST("hello", "hello");
 
@@ -258,11 +252,26 @@ TEST_BEGIN(test_malloc_snprintf) {
 }
 TEST_END
 
+TEST_BEGIN(test_malloc_snprintf_zero_size) {
+	char   buf[8];
+	size_t result;
+
+	/*
+	 * malloc_snprintf with size==0 should not write anything but should
+	 * return the length that would have been written.  A previous bug
+	 * caused an out-of-bounds write via str[size - 1] when size was 0.
+	 */
+	memset(buf, 'X', sizeof(buf));
+	result = malloc_snprintf(buf, 0, "%s", "hello");
+	expect_zu_eq(result, 5, "Expected length 5 for \"hello\"");
+	/* buf should be untouched. */
+	expect_c_eq(buf[0], 'X', "Buffer should not have been modified");
+}
+TEST_END
+
 int
 main(void) {
-	return test(
-	    test_malloc_strtoumax_no_endptr,
-	    test_malloc_strtoumax,
-	    test_malloc_snprintf_truncated,
-	    test_malloc_snprintf);
+	return test(test_malloc_strtoumax_no_endptr, test_malloc_strtoumax,
+	    test_malloc_snprintf_truncated, test_malloc_snprintf,
+	    test_malloc_snprintf_zero_size);
 }
diff --git a/test/unit/math.c b/test/unit/math.c
index a32767c5..b0994768 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -6,11 +6,11 @@
 #include <float.h>
 
 #ifdef __PGI
-#undef INFINITY
+#	undef INFINITY
 #endif
 
 #ifndef INFINITY
-#define INFINITY (DBL_MAX + DBL_MAX)
+#	define INFINITY (DBL_MAX + DBL_MAX)
 #endif
 
 static bool
@@ -20,7 +20,7 @@ double_eq_rel(double a, double b, double max_rel_err, double max_abs_err) {
 	if (fabs(a - b) < max_abs_err) {
 		return true;
 	}
-	rel_err = (fabs(b) > fabs(a)) ? fabs((a-b)/b) : fabs((a-b)/a);
+	rel_err = (fabs(b) > fabs(a)) ? fabs((a - b) / b) : fabs((a - b) / a);
 	return (rel_err < max_rel_err);
 }
 
@@ -41,209 +41,206 @@ TEST_BEGIN(test_ln_gamma_factorial) {
 
 	/* exp(ln_gamma(x)) == (x-1)! for integer x. */
 	for (x = 1; x <= 21; x++) {
-		expect_true(double_eq_rel(exp(ln_gamma(x)),
-		    (double)factorial(x-1), MAX_REL_ERR, MAX_ABS_ERR),
+		expect_true(
+		    double_eq_rel(exp(ln_gamma(x)), (double)factorial(x - 1),
+		        MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect factorial result for x=%u", x);
 	}
 }
 TEST_END
 
 /* Expected ln_gamma([0.0..100.0] increment=0.25). */
-static const double ln_gamma_misc_expected[] = {
-	INFINITY,
-	1.28802252469807743, 0.57236494292470008, 0.20328095143129538,
-	0.00000000000000000, -0.09827183642181320, -0.12078223763524518,
-	-0.08440112102048555, 0.00000000000000000, 0.12487171489239651,
-	0.28468287047291918, 0.47521466691493719, 0.69314718055994529,
-	0.93580193110872523, 1.20097360234707429, 1.48681557859341718,
-	1.79175946922805496, 2.11445692745037128, 2.45373657084244234,
-	2.80857141857573644, 3.17805383034794575, 3.56137591038669710,
-	3.95781396761871651, 4.36671603662228680, 4.78749174278204581,
-	5.21960398699022932, 5.66256205985714178, 6.11591589143154568,
-	6.57925121201010121, 7.05218545073853953, 7.53436423675873268,
-	8.02545839631598312, 8.52516136106541467, 9.03318691960512332,
-	9.54926725730099690, 10.07315123968123949, 10.60460290274525086,
-	11.14340011995171231, 11.68933342079726856, 12.24220494005076176,
-	12.80182748008146909, 13.36802367147604720, 13.94062521940376342,
-	14.51947222506051816, 15.10441257307551943, 15.69530137706046524,
-	16.29200047656724237, 16.89437797963419285, 17.50230784587389010,
-	18.11566950571089407, 18.73434751193644843, 19.35823122022435427,
-	19.98721449566188468, 20.62119544270163018, 21.26007615624470048,
-	21.90376249182879320, 22.55216385312342098, 23.20519299513386002,
-	23.86276584168908954, 24.52480131594137802, 25.19122118273868338,
-	25.86194990184851861, 26.53691449111561340, 27.21604439872720604,
-	27.89927138384089389, 28.58652940490193828, 29.27775451504081516,
-	29.97288476399884871, 30.67186010608067548, 31.37462231367769050,
-	32.08111489594735843, 32.79128302226991565, 33.50507345013689076,
-	34.22243445715505317, 34.94331577687681545, 35.66766853819134298,
-	36.39544520803305261, 37.12659953718355865, 37.86108650896109395,
-	38.59886229060776230, 39.33988418719949465, 40.08411059791735198,
-	40.83150097453079752, 41.58201578195490100, 42.33561646075348506,
-	43.09226539146988699, 43.85192586067515208, 44.61456202863158893,
-	45.38013889847690052, 46.14862228684032885, 46.91997879580877395,
-	47.69417578616628361, 48.47118135183522014, 49.25096429545256882,
-	50.03349410501914463, 50.81874093156324790, 51.60667556776436982,
-	52.39726942748592364, 53.19049452616926743, 53.98632346204390586,
-	54.78472939811231157, 55.58568604486942633, 56.38916764371992940,
-	57.19514895105859864, 58.00360522298051080, 58.81451220059079787,
-	59.62784609588432261, 60.44358357816834371, 61.26170176100199427,
-	62.08217818962842927, 62.90499082887649962, 63.73011805151035958,
-	64.55753862700632340, 65.38723171073768015, 66.21917683354901385,
-	67.05335389170279825, 67.88974313718154008, 68.72832516833013017,
-	69.56908092082363737, 70.41199165894616385, 71.25703896716800045,
-	72.10420474200799390, 72.95347118416940191, 73.80482079093779646,
-	74.65823634883015814, 75.51370092648485866, 76.37119786778275454,
-	77.23071078519033961, 78.09222355331530707, 78.95572030266725960,
-	79.82118541361435859, 80.68860351052903468, 81.55795945611502873,
-	82.42923834590904164, 83.30242550295004378, 84.17750647261028973,
-	85.05446701758152983, 85.93329311301090456, 86.81397094178107920,
-	87.69648688992882057, 88.58082754219766741, 89.46697967771913795,
-	90.35493026581838194, 91.24466646193963015, 92.13617560368709292,
-	93.02944520697742803, 93.92446296229978486, 94.82121673107967297,
-	95.71969454214321615, 96.61988458827809723, 97.52177522288820910,
-	98.42535495673848800, 99.33061245478741341, 100.23753653310367895,
-	101.14611615586458981, 102.05634043243354370, 102.96819861451382394,
-	103.88168009337621811, 104.79677439715833032, 105.71347118823287303,
-	106.63176026064346047, 107.55163153760463501, 108.47307506906540198,
-	109.39608102933323153, 110.32063971475740516, 111.24674154146920557,
-	112.17437704317786995, 113.10353686902013237, 114.03421178146170689,
-	114.96639265424990128, 115.90007047041454769, 116.83523632031698014,
-	117.77188139974506953, 118.70999700805310795, 119.64957454634490830,
-	120.59060551569974962, 121.53308151543865279, 122.47699424143097247,
-	123.42233548443955726, 124.36909712850338394, 125.31727114935689826,
-	126.26684961288492559, 127.21782467361175861, 128.17018857322420899,
-	129.12393363912724453, 130.07905228303084755, 131.03553699956862033,
-	131.99338036494577864, 132.95257503561629164, 133.91311374698926784,
-	134.87498931216194364, 135.83819462068046846, 136.80272263732638294,
-	137.76856640092901785, 138.73571902320256299, 139.70417368760718091,
-	140.67392364823425055, 141.64496222871400732, 142.61728282114600574,
-	143.59087888505104047, 144.56574394634486680, 145.54187159633210058,
-	146.51925549072063859, 147.49788934865566148, 148.47776695177302031,
-	149.45888214327129617, 150.44122882700193600, 151.42480096657754984,
-	152.40959258449737490, 153.39559776128982094, 154.38281063467164245,
-	155.37122539872302696, 156.36083630307879844, 157.35163765213474107,
-	158.34362380426921391, 159.33678917107920370, 160.33112821663092973,
-	161.32663545672428995, 162.32330545817117695, 163.32113283808695314,
-	164.32011226319519892, 165.32023844914485267, 166.32150615984036790,
-	167.32391020678358018, 168.32744544842768164, 169.33210678954270634,
-	170.33788918059275375, 171.34478761712384198, 172.35279713916281707,
-	173.36191283062726143, 174.37212981874515094, 175.38344327348534080,
-	176.39584840699734514, 177.40934047306160437, 178.42391476654847793,
-	179.43956662288721304, 180.45629141754378111, 181.47408456550741107,
-	182.49294152078630304, 183.51285777591152737, 184.53382886144947861,
-	185.55585034552262869, 186.57891783333786861, 187.60302696672312095,
-	188.62817342367162610, 189.65435291789341932, 190.68156119837468054,
-	191.70979404894376330, 192.73904728784492590, 193.76931676731820176,
-	194.80059837318714244, 195.83288802445184729, 196.86618167288995096,
-	197.90047530266301123, 198.93576492992946214, 199.97204660246373464,
-	201.00931639928148797, 202.04757043027063901, 203.08680483582807597,
-	204.12701578650228385, 205.16819948264117102, 206.21035215404597807,
-	207.25347005962987623, 208.29754948708190909, 209.34258675253678916,
-	210.38857820024875878, 211.43552020227099320, 212.48340915813977858,
-	213.53224149456323744, 214.58201366511514152, 215.63272214993284592,
-	216.68436345542014010, 217.73693411395422004, 218.79043068359703739,
-	219.84484974781133815, 220.90018791517996988, 221.95644181913033322,
-	223.01360811766215875, 224.07168349307951871, 225.13066465172661879,
-	226.19054832372759734, 227.25133126272962159, 228.31301024565024704,
-	229.37558207242807384, 230.43904356577689896, 231.50339157094342113,
-	232.56862295546847008, 233.63473460895144740, 234.70172344281823484,
-	235.76958639009222907, 236.83832040516844586, 237.90792246359117712,
-	238.97838956183431947, 240.04971871708477238, 241.12190696702904802,
-	242.19495136964280846, 243.26884900298270509, 244.34359696498191283,
-	245.41919237324782443, 246.49563236486270057, 247.57291409618682110,
-	248.65103474266476269, 249.72999149863338175, 250.80978157713354904,
-	251.89040220972316320, 252.97185064629374551, 254.05412415488834199,
-	255.13722002152300661, 256.22113555000953511, 257.30586806178126835,
-	258.39141489572085675, 259.47777340799029844, 260.56494097186322279,
-	261.65291497755913497, 262.74169283208021852, 263.83127195904967266,
-	264.92164979855277807, 266.01282380697938379, 267.10479145686849733,
-	268.19755023675537586, 269.29109765101975427, 270.38543121973674488,
-	271.48054847852881721, 272.57644697842033565, 273.67312428569374561,
-	274.77057798174683967, 275.86880566295326389, 276.96780494052313770,
-	278.06757344036617496, 279.16810880295668085, 280.26940868320008349,
-	281.37147075030043197, 282.47429268763045229, 283.57787219260217171,
-	284.68220697654078322, 285.78729476455760050, 286.89313329542699194,
-	287.99972032146268930, 289.10705360839756395, 290.21513093526289140,
-	291.32395009427028754, 292.43350889069523646, 293.54380514276073200,
-	294.65483668152336350, 295.76660135076059532, 296.87909700685889902,
-	297.99232151870342022, 299.10627276756946458, 300.22094864701409733,
-	301.33634706277030091, 302.45246593264130297, 303.56930318639643929,
-	304.68685676566872189, 305.80512462385280514, 306.92410472600477078,
-	308.04379504874236773, 309.16419358014690033, 310.28529831966631036,
-	311.40710727801865687, 312.52961847709792664, 313.65282994987899201,
-	314.77673974032603610, 315.90134590329950015, 317.02664650446632777,
-	318.15263962020929966, 319.27932333753892635, 320.40669575400545455,
-	321.53475497761127144, 322.66349912672620803, 323.79292633000159185,
-	324.92303472628691452, 326.05382246454587403, 327.18528770377525916,
-	328.31742861292224234, 329.45024337080525356, 330.58373016603343331,
-	331.71788719692847280, 332.85271267144611329, 333.98820480709991898,
-	335.12436183088397001, 336.26118197919845443, 337.39866349777429377,
-	338.53680464159958774, 339.67560367484657036, 340.81505887079896411,
-	341.95516851178109619, 343.09593088908627578, 344.23734430290727460,
-	345.37940706226686416, 346.52211748494903532, 347.66547389743118401,
-	348.80947463481720661, 349.95411804077025408, 351.09940246744753267,
-	352.24532627543504759, 353.39188783368263103, 354.53908551944078908,
-	355.68691771819692349, 356.83538282361303118, 357.98447923746385868,
-	359.13420536957539753
-};
+static const double ln_gamma_misc_expected[] = {INFINITY, 1.28802252469807743,
+    0.57236494292470008, 0.20328095143129538, 0.00000000000000000,
+    -0.09827183642181320, -0.12078223763524518, -0.08440112102048555,
+    0.00000000000000000, 0.12487171489239651, 0.28468287047291918,
+    0.47521466691493719, 0.69314718055994529, 0.93580193110872523,
+    1.20097360234707429, 1.48681557859341718, 1.79175946922805496,
+    2.11445692745037128, 2.45373657084244234, 2.80857141857573644,
+    3.17805383034794575, 3.56137591038669710, 3.95781396761871651,
+    4.36671603662228680, 4.78749174278204581, 5.21960398699022932,
+    5.66256205985714178, 6.11591589143154568, 6.57925121201010121,
+    7.05218545073853953, 7.53436423675873268, 8.02545839631598312,
+    8.52516136106541467, 9.03318691960512332, 9.54926725730099690,
+    10.07315123968123949, 10.60460290274525086, 11.14340011995171231,
+    11.68933342079726856, 12.24220494005076176, 12.80182748008146909,
+    13.36802367147604720, 13.94062521940376342, 14.51947222506051816,
+    15.10441257307551943, 15.69530137706046524, 16.29200047656724237,
+    16.89437797963419285, 17.50230784587389010, 18.11566950571089407,
+    18.73434751193644843, 19.35823122022435427, 19.98721449566188468,
+    20.62119544270163018, 21.26007615624470048, 21.90376249182879320,
+    22.55216385312342098, 23.20519299513386002, 23.86276584168908954,
+    24.52480131594137802, 25.19122118273868338, 25.86194990184851861,
+    26.53691449111561340, 27.21604439872720604, 27.89927138384089389,
+    28.58652940490193828, 29.27775451504081516, 29.97288476399884871,
+    30.67186010608067548, 31.37462231367769050, 32.08111489594735843,
+    32.79128302226991565, 33.50507345013689076, 34.22243445715505317,
+    34.94331577687681545, 35.66766853819134298, 36.39544520803305261,
+    37.12659953718355865, 37.86108650896109395, 38.59886229060776230,
+    39.33988418719949465, 40.08411059791735198, 40.83150097453079752,
+    41.58201578195490100, 42.33561646075348506, 43.09226539146988699,
+    43.85192586067515208, 44.61456202863158893, 45.38013889847690052,
+    46.14862228684032885, 46.91997879580877395, 47.69417578616628361,
+    48.47118135183522014, 49.25096429545256882, 50.03349410501914463,
+    50.81874093156324790, 51.60667556776436982, 52.39726942748592364,
+    53.19049452616926743, 53.98632346204390586, 54.78472939811231157,
+    55.58568604486942633, 56.38916764371992940, 57.19514895105859864,
+    58.00360522298051080, 58.81451220059079787, 59.62784609588432261,
+    60.44358357816834371, 61.26170176100199427, 62.08217818962842927,
+    62.90499082887649962, 63.73011805151035958, 64.55753862700632340,
+    65.38723171073768015, 66.21917683354901385, 67.05335389170279825,
+    67.88974313718154008, 68.72832516833013017, 69.56908092082363737,
+    70.41199165894616385, 71.25703896716800045, 72.10420474200799390,
+    72.95347118416940191, 73.80482079093779646, 74.65823634883015814,
+    75.51370092648485866, 76.37119786778275454, 77.23071078519033961,
+    78.09222355331530707, 78.95572030266725960, 79.82118541361435859,
+    80.68860351052903468, 81.55795945611502873, 82.42923834590904164,
+    83.30242550295004378, 84.17750647261028973, 85.05446701758152983,
+    85.93329311301090456, 86.81397094178107920, 87.69648688992882057,
+    88.58082754219766741, 89.46697967771913795, 90.35493026581838194,
+    91.24466646193963015, 92.13617560368709292, 93.02944520697742803,
+    93.92446296229978486, 94.82121673107967297, 95.71969454214321615,
+    96.61988458827809723, 97.52177522288820910, 98.42535495673848800,
+    99.33061245478741341, 100.23753653310367895, 101.14611615586458981,
+    102.05634043243354370, 102.96819861451382394, 103.88168009337621811,
+    104.79677439715833032, 105.71347118823287303, 106.63176026064346047,
+    107.55163153760463501, 108.47307506906540198, 109.39608102933323153,
+    110.32063971475740516, 111.24674154146920557, 112.17437704317786995,
+    113.10353686902013237, 114.03421178146170689, 114.96639265424990128,
+    115.90007047041454769, 116.83523632031698014, 117.77188139974506953,
+    118.70999700805310795, 119.64957454634490830, 120.59060551569974962,
+    121.53308151543865279, 122.47699424143097247, 123.42233548443955726,
+    124.36909712850338394, 125.31727114935689826, 126.26684961288492559,
+    127.21782467361175861, 128.17018857322420899, 129.12393363912724453,
+    130.07905228303084755, 131.03553699956862033, 131.99338036494577864,
+    132.95257503561629164, 133.91311374698926784, 134.87498931216194364,
+    135.83819462068046846, 136.80272263732638294, 137.76856640092901785,
+    138.73571902320256299, 139.70417368760718091, 140.67392364823425055,
+    141.64496222871400732, 142.61728282114600574, 143.59087888505104047,
+    144.56574394634486680, 145.54187159633210058, 146.51925549072063859,
+    147.49788934865566148, 148.47776695177302031, 149.45888214327129617,
+    150.44122882700193600, 151.42480096657754984, 152.40959258449737490,
+    153.39559776128982094, 154.38281063467164245, 155.37122539872302696,
+    156.36083630307879844, 157.35163765213474107, 158.34362380426921391,
+    159.33678917107920370, 160.33112821663092973, 161.32663545672428995,
+    162.32330545817117695, 163.32113283808695314, 164.32011226319519892,
+    165.32023844914485267, 166.32150615984036790, 167.32391020678358018,
+    168.32744544842768164, 169.33210678954270634, 170.33788918059275375,
+    171.34478761712384198, 172.35279713916281707, 173.36191283062726143,
+    174.37212981874515094, 175.38344327348534080, 176.39584840699734514,
+    177.40934047306160437, 178.42391476654847793, 179.43956662288721304,
+    180.45629141754378111, 181.47408456550741107, 182.49294152078630304,
+    183.51285777591152737, 184.53382886144947861, 185.55585034552262869,
+    186.57891783333786861, 187.60302696672312095, 188.62817342367162610,
+    189.65435291789341932, 190.68156119837468054, 191.70979404894376330,
+    192.73904728784492590, 193.76931676731820176, 194.80059837318714244,
+    195.83288802445184729, 196.86618167288995096, 197.90047530266301123,
+    198.93576492992946214, 199.97204660246373464, 201.00931639928148797,
+    202.04757043027063901, 203.08680483582807597, 204.12701578650228385,
+    205.16819948264117102, 206.21035215404597807, 207.25347005962987623,
+    208.29754948708190909, 209.34258675253678916, 210.38857820024875878,
+    211.43552020227099320, 212.48340915813977858, 213.53224149456323744,
+    214.58201366511514152, 215.63272214993284592, 216.68436345542014010,
+    217.73693411395422004, 218.79043068359703739, 219.84484974781133815,
+    220.90018791517996988, 221.95644181913033322, 223.01360811766215875,
+    224.07168349307951871, 225.13066465172661879, 226.19054832372759734,
+    227.25133126272962159, 228.31301024565024704, 229.37558207242807384,
+    230.43904356577689896, 231.50339157094342113, 232.56862295546847008,
+    233.63473460895144740, 234.70172344281823484, 235.76958639009222907,
+    236.83832040516844586, 237.90792246359117712, 238.97838956183431947,
+    240.04971871708477238, 241.12190696702904802, 242.19495136964280846,
+    243.26884900298270509, 244.34359696498191283, 245.41919237324782443,
+    246.49563236486270057, 247.57291409618682110, 248.65103474266476269,
+    249.72999149863338175, 250.80978157713354904, 251.89040220972316320,
+    252.97185064629374551, 254.05412415488834199, 255.13722002152300661,
+    256.22113555000953511, 257.30586806178126835, 258.39141489572085675,
+    259.47777340799029844, 260.56494097186322279, 261.65291497755913497,
+    262.74169283208021852, 263.83127195904967266, 264.92164979855277807,
+    266.01282380697938379, 267.10479145686849733, 268.19755023675537586,
+    269.29109765101975427, 270.38543121973674488, 271.48054847852881721,
+    272.57644697842033565, 273.67312428569374561, 274.77057798174683967,
+    275.86880566295326389, 276.96780494052313770, 278.06757344036617496,
+    279.16810880295668085, 280.26940868320008349, 281.37147075030043197,
+    282.47429268763045229, 283.57787219260217171, 284.68220697654078322,
+    285.78729476455760050, 286.89313329542699194, 287.99972032146268930,
+    289.10705360839756395, 290.21513093526289140, 291.32395009427028754,
+    292.43350889069523646, 293.54380514276073200, 294.65483668152336350,
+    295.76660135076059532, 296.87909700685889902, 297.99232151870342022,
+    299.10627276756946458, 300.22094864701409733, 301.33634706277030091,
+    302.45246593264130297, 303.56930318639643929, 304.68685676566872189,
+    305.80512462385280514, 306.92410472600477078, 308.04379504874236773,
+    309.16419358014690033, 310.28529831966631036, 311.40710727801865687,
+    312.52961847709792664, 313.65282994987899201, 314.77673974032603610,
+    315.90134590329950015, 317.02664650446632777, 318.15263962020929966,
+    319.27932333753892635, 320.40669575400545455, 321.53475497761127144,
+    322.66349912672620803, 323.79292633000159185, 324.92303472628691452,
+    326.05382246454587403, 327.18528770377525916, 328.31742861292224234,
+    329.45024337080525356, 330.58373016603343331, 331.71788719692847280,
+    332.85271267144611329, 333.98820480709991898, 335.12436183088397001,
+    336.26118197919845443, 337.39866349777429377, 338.53680464159958774,
+    339.67560367484657036, 340.81505887079896411, 341.95516851178109619,
+    343.09593088908627578, 344.23734430290727460, 345.37940706226686416,
+    346.52211748494903532, 347.66547389743118401, 348.80947463481720661,
+    349.95411804077025408, 351.09940246744753267, 352.24532627543504759,
+    353.39188783368263103, 354.53908551944078908, 355.68691771819692349,
+    356.83538282361303118, 357.98447923746385868, 359.13420536957539753};
 
 TEST_BEGIN(test_ln_gamma_misc) {
 	unsigned i;
 
-	for (i = 1; i < sizeof(ln_gamma_misc_expected)/sizeof(double); i++) {
+	for (i = 1; i < sizeof(ln_gamma_misc_expected) / sizeof(double); i++) {
 		double x = (double)i * 0.25;
-		expect_true(double_eq_rel(ln_gamma(x),
-		    ln_gamma_misc_expected[i], MAX_REL_ERR, MAX_ABS_ERR),
+		expect_true(
+		    double_eq_rel(ln_gamma(x), ln_gamma_misc_expected[i],
+		        MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect ln_gamma result for i=%u", i);
 	}
 }
 TEST_END
 
 /* Expected pt_norm([0.01..0.99] increment=0.01). */
-static const double pt_norm_expected[] = {
-	-INFINITY,
-	-2.32634787404084076, -2.05374891063182252, -1.88079360815125085,
-	-1.75068607125216946, -1.64485362695147264, -1.55477359459685305,
-	-1.47579102817917063, -1.40507156030963221, -1.34075503369021654,
-	-1.28155156554460081, -1.22652812003661049, -1.17498679206608991,
-	-1.12639112903880045, -1.08031934081495606, -1.03643338949378938,
-	-0.99445788320975281, -0.95416525314619416, -0.91536508784281390,
-	-0.87789629505122846, -0.84162123357291418, -0.80642124701824025,
-	-0.77219321418868492, -0.73884684918521371, -0.70630256284008752,
-	-0.67448975019608171, -0.64334540539291685, -0.61281299101662701,
-	-0.58284150727121620, -0.55338471955567281, -0.52440051270804067,
-	-0.49585034734745320, -0.46769879911450812, -0.43991316567323380,
-	-0.41246312944140462, -0.38532046640756751, -0.35845879325119373,
-	-0.33185334643681652, -0.30548078809939738, -0.27931903444745404,
-	-0.25334710313579978, -0.22754497664114931, -0.20189347914185077,
-	-0.17637416478086135, -0.15096921549677725, -0.12566134685507399,
-	-0.10043372051146975, -0.07526986209982976, -0.05015358346473352,
-	-0.02506890825871106, 0.00000000000000000, 0.02506890825871106,
-	0.05015358346473366, 0.07526986209982990, 0.10043372051146990,
-	0.12566134685507413, 0.15096921549677739, 0.17637416478086146,
-	0.20189347914185105, 0.22754497664114931, 0.25334710313579978,
-	0.27931903444745404, 0.30548078809939738, 0.33185334643681652,
-	0.35845879325119373, 0.38532046640756762, 0.41246312944140484,
-	0.43991316567323391, 0.46769879911450835, 0.49585034734745348,
-	0.52440051270804111, 0.55338471955567303, 0.58284150727121620,
-	0.61281299101662701, 0.64334540539291685, 0.67448975019608171,
-	0.70630256284008752, 0.73884684918521371, 0.77219321418868492,
-	0.80642124701824036, 0.84162123357291441, 0.87789629505122879,
-	0.91536508784281423, 0.95416525314619460, 0.99445788320975348,
-	1.03643338949378938, 1.08031934081495606, 1.12639112903880045,
-	1.17498679206608991, 1.22652812003661049, 1.28155156554460081,
-	1.34075503369021654, 1.40507156030963265, 1.47579102817917085,
-	1.55477359459685394, 1.64485362695147308, 1.75068607125217102,
-	1.88079360815125041, 2.05374891063182208, 2.32634787404084076
-};
+static const double pt_norm_expected[] = {-INFINITY, -2.32634787404084076,
+    -2.05374891063182252, -1.88079360815125085, -1.75068607125216946,
+    -1.64485362695147264, -1.55477359459685305, -1.47579102817917063,
+    -1.40507156030963221, -1.34075503369021654, -1.28155156554460081,
+    -1.22652812003661049, -1.17498679206608991, -1.12639112903880045,
+    -1.08031934081495606, -1.03643338949378938, -0.99445788320975281,
+    -0.95416525314619416, -0.91536508784281390, -0.87789629505122846,
+    -0.84162123357291418, -0.80642124701824025, -0.77219321418868492,
+    -0.73884684918521371, -0.70630256284008752, -0.67448975019608171,
+    -0.64334540539291685, -0.61281299101662701, -0.58284150727121620,
+    -0.55338471955567281, -0.52440051270804067, -0.49585034734745320,
+    -0.46769879911450812, -0.43991316567323380, -0.41246312944140462,
+    -0.38532046640756751, -0.35845879325119373, -0.33185334643681652,
+    -0.30548078809939738, -0.27931903444745404, -0.25334710313579978,
+    -0.22754497664114931, -0.20189347914185077, -0.17637416478086135,
+    -0.15096921549677725, -0.12566134685507399, -0.10043372051146975,
+    -0.07526986209982976, -0.05015358346473352, -0.02506890825871106,
+    0.00000000000000000, 0.02506890825871106, 0.05015358346473366,
+    0.07526986209982990, 0.10043372051146990, 0.12566134685507413,
+    0.15096921549677739, 0.17637416478086146, 0.20189347914185105,
+    0.22754497664114931, 0.25334710313579978, 0.27931903444745404,
+    0.30548078809939738, 0.33185334643681652, 0.35845879325119373,
+    0.38532046640756762, 0.41246312944140484, 0.43991316567323391,
+    0.46769879911450835, 0.49585034734745348, 0.52440051270804111,
+    0.55338471955567303, 0.58284150727121620, 0.61281299101662701,
+    0.64334540539291685, 0.67448975019608171, 0.70630256284008752,
+    0.73884684918521371, 0.77219321418868492, 0.80642124701824036,
+    0.84162123357291441, 0.87789629505122879, 0.91536508784281423,
+    0.95416525314619460, 0.99445788320975348, 1.03643338949378938,
+    1.08031934081495606, 1.12639112903880045, 1.17498679206608991,
+    1.22652812003661049, 1.28155156554460081, 1.34075503369021654,
+    1.40507156030963265, 1.47579102817917085, 1.55477359459685394,
+    1.64485362695147308, 1.75068607125217102, 1.88079360815125041,
+    2.05374891063182208, 2.32634787404084076};
 
 TEST_BEGIN(test_pt_norm) {
 	unsigned i;
 
-	for (i = 1; i < sizeof(pt_norm_expected)/sizeof(double); i++) {
+	for (i = 1; i < sizeof(pt_norm_expected) / sizeof(double); i++) {
 		double p = (double)i * 0.01;
 		expect_true(double_eq_rel(pt_norm(p), pt_norm_expected[i],
-		    MAX_REL_ERR, MAX_ABS_ERR),
+		                MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect pt_norm result for i=%u", i);
 	}
 }
@@ -254,49 +251,49 @@ TEST_END
  *                  df={0.1, 1.1, 10.1, 100.1, 1000.1}).
  */
 static const double pt_chi2_df[] = {0.1, 1.1, 10.1, 100.1, 1000.1};
-static const double pt_chi2_expected[] = {
-	1.168926411457320e-40, 1.347680397072034e-22, 3.886980416666260e-17,
-	8.245951724356564e-14, 2.068936347497604e-11, 1.562561743309233e-09,
-	5.459543043426564e-08, 1.114775688149252e-06, 1.532101202364371e-05,
-	1.553884683726585e-04, 1.239396954915939e-03, 8.153872320255721e-03,
-	4.631183739647523e-02, 2.473187311701327e-01, 2.175254800183617e+00,
+static const double pt_chi2_expected[] = {1.168926411457320e-40,
+    1.347680397072034e-22, 3.886980416666260e-17, 8.245951724356564e-14,
+    2.068936347497604e-11, 1.562561743309233e-09, 5.459543043426564e-08,
+    1.114775688149252e-06, 1.532101202364371e-05, 1.553884683726585e-04,
+    1.239396954915939e-03, 8.153872320255721e-03, 4.631183739647523e-02,
+    2.473187311701327e-01, 2.175254800183617e+00,
 
-	0.0003729887888876379, 0.0164409238228929513, 0.0521523015190650113,
-	0.1064701372271216612, 0.1800913735793082115, 0.2748704281195626931,
-	0.3939246282787986497, 0.5420727552260817816, 0.7267265822221973259,
-	0.9596554296000253670, 1.2607440376386165326, 1.6671185084541604304,
-	2.2604828984738705167, 3.2868613342148607082, 6.9298574921692139839,
+    0.0003729887888876379, 0.0164409238228929513, 0.0521523015190650113,
+    0.1064701372271216612, 0.1800913735793082115, 0.2748704281195626931,
+    0.3939246282787986497, 0.5420727552260817816, 0.7267265822221973259,
+    0.9596554296000253670, 1.2607440376386165326, 1.6671185084541604304,
+    2.2604828984738705167, 3.2868613342148607082, 6.9298574921692139839,
 
-	2.606673548632508, 4.602913725294877, 5.646152813924212,
-	6.488971315540869, 7.249823275816285, 7.977314231410841,
-	8.700354939944047, 9.441728024225892, 10.224338321374127,
-	11.076435368801061, 12.039320937038386, 13.183878752697167,
-	14.657791935084575, 16.885728216339373, 23.361991680031817,
+    2.606673548632508, 4.602913725294877, 5.646152813924212, 6.488971315540869,
+    7.249823275816285, 7.977314231410841, 8.700354939944047, 9.441728024225892,
+    10.224338321374127, 11.076435368801061, 12.039320937038386,
+    13.183878752697167, 14.657791935084575, 16.885728216339373,
+    23.361991680031817,
 
-	70.14844087392152, 80.92379498849355, 85.53325420085891,
-	88.94433120715347, 91.83732712857017, 94.46719943606301,
-	96.96896479994635, 99.43412843510363, 101.94074719829733,
-	104.57228644307247, 107.43900093448734, 110.71844673417287,
-	114.76616819871325, 120.57422505959563, 135.92318818757556,
+    70.14844087392152, 80.92379498849355, 85.53325420085891, 88.94433120715347,
+    91.83732712857017, 94.46719943606301, 96.96896479994635, 99.43412843510363,
+    101.94074719829733, 104.57228644307247, 107.43900093448734,
+    110.71844673417287, 114.76616819871325, 120.57422505959563,
+    135.92318818757556,
 
-	899.0072447849649, 937.9271278858220, 953.8117189560207,
-	965.3079371501154, 974.8974061207954, 983.4936235182347,
-	991.5691170518946, 999.4334123954690, 1007.3391826856553,
-	1015.5445154999951, 1024.3777075619569, 1034.3538789836223,
-	1046.4872561869577, 1063.5717461999654, 1107.0741966053859
-};
+    899.0072447849649, 937.9271278858220, 953.8117189560207, 965.3079371501154,
+    974.8974061207954, 983.4936235182347, 991.5691170518946, 999.4334123954690,
+    1007.3391826856553, 1015.5445154999951, 1024.3777075619569,
+    1034.3538789836223, 1046.4872561869577, 1063.5717461999654,
+    1107.0741966053859};
 
 TEST_BEGIN(test_pt_chi2) {
 	unsigned i, j;
 	unsigned e = 0;
 
-	for (i = 0; i < sizeof(pt_chi2_df)/sizeof(double); i++) {
+	for (i = 0; i < sizeof(pt_chi2_df) / sizeof(double); i++) {
 		double df = pt_chi2_df[i];
 		double ln_gamma_df = ln_gamma(df * 0.5);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			expect_true(double_eq_rel(pt_chi2(p, df, ln_gamma_df),
-			    pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
+			expect_true(
+			    double_eq_rel(pt_chi2(p, df, ln_gamma_df),
+			        pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
 			    "Incorrect pt_chi2 result for i=%u, j=%u", i, j);
 			e++;
 		}
@@ -309,56 +306,56 @@ TEST_END
  *                   shape=[0.5..3.0] increment=0.5).
  */
 static const double pt_gamma_shape[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0};
-static const double pt_gamma_expected[] = {
-	7.854392895485103e-05, 5.043466107888016e-03, 1.788288957794883e-02,
-	3.900956150232906e-02, 6.913847560638034e-02, 1.093710833465766e-01,
-	1.613412523825817e-01, 2.274682115597864e-01, 3.114117323127083e-01,
-	4.189466220207417e-01, 5.598106789059246e-01, 7.521856146202706e-01,
-	1.036125427911119e+00, 1.532450860038180e+00, 3.317448300510606e+00,
+static const double pt_gamma_expected[] = {7.854392895485103e-05,
+    5.043466107888016e-03, 1.788288957794883e-02, 3.900956150232906e-02,
+    6.913847560638034e-02, 1.093710833465766e-01, 1.613412523825817e-01,
+    2.274682115597864e-01, 3.114117323127083e-01, 4.189466220207417e-01,
+    5.598106789059246e-01, 7.521856146202706e-01, 1.036125427911119e+00,
+    1.532450860038180e+00, 3.317448300510606e+00,
 
-	0.01005033585350144, 0.08338160893905107, 0.16251892949777497,
-	0.24846135929849966, 0.34249030894677596, 0.44628710262841947,
-	0.56211891815354142, 0.69314718055994529, 0.84397007029452920,
-	1.02165124753198167, 1.23787435600161766, 1.51412773262977574,
-	1.89711998488588196, 2.52572864430825783, 4.60517018598809091,
+    0.01005033585350144, 0.08338160893905107, 0.16251892949777497,
+    0.24846135929849966, 0.34249030894677596, 0.44628710262841947,
+    0.56211891815354142, 0.69314718055994529, 0.84397007029452920,
+    1.02165124753198167, 1.23787435600161766, 1.51412773262977574,
+    1.89711998488588196, 2.52572864430825783, 4.60517018598809091,
 
-	0.05741590094955853, 0.24747378084860744, 0.39888572212236084,
-	0.54394139997444901, 0.69048812513915159, 0.84311389861296104,
-	1.00580622221479898, 1.18298694218766931, 1.38038096305861213,
-	1.60627736383027453, 1.87396970522337947, 2.20749220408081070,
-	2.65852391865854942, 3.37934630984842244, 5.67243336507218476,
+    0.05741590094955853, 0.24747378084860744, 0.39888572212236084,
+    0.54394139997444901, 0.69048812513915159, 0.84311389861296104,
+    1.00580622221479898, 1.18298694218766931, 1.38038096305861213,
+    1.60627736383027453, 1.87396970522337947, 2.20749220408081070,
+    2.65852391865854942, 3.37934630984842244, 5.67243336507218476,
 
-	0.1485547402532659, 0.4657458011640391, 0.6832386130709406,
-	0.8794297834672100, 1.0700752852474524, 1.2629614217350744,
-	1.4638400448580779, 1.6783469900166610, 1.9132338090606940,
-	2.1778589228618777, 2.4868823970010991, 2.8664695666264195,
-	3.3724415436062114, 4.1682658512758071, 6.6383520679938108,
+    0.1485547402532659, 0.4657458011640391, 0.6832386130709406,
+    0.8794297834672100, 1.0700752852474524, 1.2629614217350744,
+    1.4638400448580779, 1.6783469900166610, 1.9132338090606940,
+    2.1778589228618777, 2.4868823970010991, 2.8664695666264195,
+    3.3724415436062114, 4.1682658512758071, 6.6383520679938108,
 
-	0.2771490383641385, 0.7195001279643727, 0.9969081732265243,
-	1.2383497880608061, 1.4675206597269927, 1.6953064251816552,
-	1.9291243435606809, 2.1757300955477641, 2.4428032131216391,
-	2.7406534569230616, 3.0851445039665513, 3.5043101122033367,
-	4.0575997065264637, 4.9182956424675286, 7.5431362346944937,
+    0.2771490383641385, 0.7195001279643727, 0.9969081732265243,
+    1.2383497880608061, 1.4675206597269927, 1.6953064251816552,
+    1.9291243435606809, 2.1757300955477641, 2.4428032131216391,
+    2.7406534569230616, 3.0851445039665513, 3.5043101122033367,
+    4.0575997065264637, 4.9182956424675286, 7.5431362346944937,
 
-	0.4360451650782932, 0.9983600902486267, 1.3306365880734528,
-	1.6129750834753802, 1.8767241606994294, 2.1357032436097660,
-	2.3988853336865565, 2.6740603137235603, 2.9697561737517959,
-	3.2971457713883265, 3.6731795898504660, 4.1275751617770631,
-	4.7230515633946677, 5.6417477865306020, 8.4059469148854635
-};
+    0.4360451650782932, 0.9983600902486267, 1.3306365880734528,
+    1.6129750834753802, 1.8767241606994294, 2.1357032436097660,
+    2.3988853336865565, 2.6740603137235603, 2.9697561737517959,
+    3.2971457713883265, 3.6731795898504660, 4.1275751617770631,
+    4.7230515633946677, 5.6417477865306020, 8.4059469148854635};
 
 TEST_BEGIN(test_pt_gamma_shape) {
 	unsigned i, j;
 	unsigned e = 0;
 
-	for (i = 0; i < sizeof(pt_gamma_shape)/sizeof(double); i++) {
+	for (i = 0; i < sizeof(pt_gamma_shape) / sizeof(double); i++) {
 		double shape = pt_gamma_shape[i];
 		double ln_gamma_shape = ln_gamma(shape);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			expect_true(double_eq_rel(pt_gamma(p, shape, 1.0,
-			    ln_gamma_shape), pt_gamma_expected[e], MAX_REL_ERR,
-			    MAX_ABS_ERR),
+			expect_true(
+			    double_eq_rel(
+			        pt_gamma(p, shape, 1.0, ln_gamma_shape),
+			        pt_gamma_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
 			    "Incorrect pt_gamma result for i=%u, j=%u", i, j);
 			e++;
 		}
@@ -370,21 +367,16 @@ TEST_BEGIN(test_pt_gamma_scale) {
 	double shape = 1.0;
 	double ln_gamma_shape = ln_gamma(shape);
 
-	expect_true(double_eq_rel(
-	    pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0,
-	    pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR,
-	    MAX_ABS_ERR),
+	expect_true(
+	    double_eq_rel(pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0,
+	        pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR,
+	        MAX_ABS_ERR),
 	    "Scale should be trivially equivalent to external multiplication");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ln_gamma_factorial,
-	    test_ln_gamma_misc,
-	    test_pt_norm,
-	    test_pt_chi2,
-	    test_pt_gamma_shape,
-	    test_pt_gamma_scale);
+	return test(test_ln_gamma_factorial, test_ln_gamma_misc, test_pt_norm,
+	    test_pt_chi2, test_pt_gamma_shape, test_pt_gamma_scale);
 }
diff --git a/test/unit/mpsc_queue.c b/test/unit/mpsc_queue.c
index 895edf84..d22d5488 100644
--- a/test/unit/mpsc_queue.c
+++ b/test/unit/mpsc_queue.c
@@ -12,10 +12,10 @@ struct elem_s {
 };
 
 /* Include both proto and gen to make sure they match up. */
-mpsc_queue_proto(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
-    elem_list_t);
-mpsc_queue_gen(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
-    elem_list_t, link);
+mpsc_queue_proto(
+    static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t, elem_list_t);
+mpsc_queue_gen(
+    static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t, elem_list_t, link);
 
 static void
 init_elems_simple(elem_t *elems, int nelems, int thread) {
@@ -29,8 +29,8 @@ init_elems_simple(elem_t *elems, int nelems, int thread) {
 static void
 check_elems_simple(elem_list_t *list, int nelems, int thread) {
 	elem_t *elem;
-	int next_idx = 0;
-	ql_foreach(elem, list, link) {
+	int     next_idx = 0;
+	ql_foreach (elem, list, link) {
 		expect_d_lt(next_idx, nelems, "Too many list items");
 		expect_d_eq(thread, elem->thread, "");
 		expect_d_eq(next_idx, elem->idx, "List out of order");
@@ -39,9 +39,9 @@ check_elems_simple(elem_list_t *list, int nelems, int thread) {
 }
 
 TEST_BEGIN(test_simple) {
-	enum {NELEMS = 10};
-	elem_t elems[NELEMS];
-	elem_list_t list;
+	enum { NELEMS = 10 };
+	elem_t            elems[NELEMS];
+	elem_list_t       list;
 	elem_mpsc_queue_t queue;
 
 	/* Pop empty queue onto empty list -> empty list */
@@ -82,7 +82,6 @@ TEST_BEGIN(test_simple) {
 	}
 	elem_mpsc_queue_pop_batch(&queue, &list);
 	check_elems_simple(&list, NELEMS, 0);
-
 }
 TEST_END
 
@@ -137,7 +136,7 @@ TEST_BEGIN(test_push_single_or_batch) {
 TEST_END
 
 TEST_BEGIN(test_multi_op) {
-	enum {NELEMS = 20};
+	enum { NELEMS = 20 };
 	elem_t elems[NELEMS];
 	init_elems_simple(elems, NELEMS, 0);
 	elem_list_t push_list;
@@ -176,30 +175,29 @@ TEST_BEGIN(test_multi_op) {
 	elem_mpsc_queue_pop_batch(&queue, &result_list);
 
 	check_elems_simple(&result_list, NELEMS, 0);
-
 }
 TEST_END
 
 typedef struct pusher_arg_s pusher_arg_t;
 struct pusher_arg_s {
 	elem_mpsc_queue_t *queue;
-	int thread;
-	elem_t *elems;
-	int nelems;
+	int                thread;
+	elem_t            *elems;
+	int                nelems;
 };
 
 typedef struct popper_arg_s popper_arg_t;
 struct popper_arg_s {
 	elem_mpsc_queue_t *queue;
-	int npushers;
-	int nelems_per_pusher;
-	int *pusher_counts;
+	int                npushers;
+	int                nelems_per_pusher;
+	int               *pusher_counts;
 };
 
 static void *
 thd_pusher(void *void_arg) {
 	pusher_arg_t *arg = (pusher_arg_t *)void_arg;
-	int next_idx = 0;
+	int           next_idx = 0;
 	while (next_idx < arg->nelems) {
 		/* Push 10 items in batch. */
 		elem_list_t list;
@@ -216,7 +214,6 @@ thd_pusher(void *void_arg) {
 			elem_mpsc_queue_push(arg->queue, &arg->elems[next_idx]);
 			next_idx++;
 		}
-
 	}
 	return NULL;
 }
@@ -224,13 +221,13 @@ thd_pusher(void *void_arg) {
 static void *
 thd_popper(void *void_arg) {
 	popper_arg_t *arg = (popper_arg_t *)void_arg;
-	int done_pushers = 0;
+	int           done_pushers = 0;
 	while (done_pushers < arg->npushers) {
 		elem_list_t list;
 		ql_new(&list);
 		elem_mpsc_queue_pop_batch(arg->queue, &list);
 		elem_t *elem;
-		ql_foreach(elem, &list, link) {
+		ql_foreach (elem, &list, link) {
 			int thread = elem->thread;
 			int idx = elem->idx;
 			expect_d_eq(arg->pusher_counts[thread], idx,
@@ -248,12 +245,12 @@ thd_popper(void *void_arg) {
 TEST_BEGIN(test_multiple_threads) {
 	enum {
 		NPUSHERS = 4,
-		NELEMS_PER_PUSHER = 1000*1000,
+		NELEMS_PER_PUSHER = 1000 * 1000,
 	};
-	thd_t pushers[NPUSHERS];
+	thd_t        pushers[NPUSHERS];
 	pusher_arg_t pusher_arg[NPUSHERS];
 
-	thd_t popper;
+	thd_t        popper;
 	popper_arg_t popper_arg;
 
 	elem_mpsc_queue_t queue;
@@ -296,9 +293,6 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_simple,
-	    test_push_single_or_batch,
-	    test_multi_op,
-	    test_multiple_threads);
+	return test_no_reentrancy(test_simple, test_push_single_or_batch,
+	    test_multi_op, test_multiple_threads);
 }
diff --git a/test/unit/mq.c b/test/unit/mq.c
index f833f77c..9b3b547a 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -1,22 +1,22 @@
 #include "test/jemalloc_test.h"
 
-#define NSENDERS	3
-#define NMSGS		100000
+#define NSENDERS 3
+#define NMSGS 100000
 
 typedef struct mq_msg_s mq_msg_t;
 struct mq_msg_s {
-	mq_msg(mq_msg_t)	link;
+	mq_msg(mq_msg_t) link;
 };
 mq_gen(static, mq_, mq_t, mq_msg_t, link)
 
-TEST_BEGIN(test_mq_basic) {
-	mq_t mq;
+    TEST_BEGIN(test_mq_basic) {
+	mq_t     mq;
 	mq_msg_t msg;
 
 	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
 	expect_u_eq(mq_count(&mq), 0, "mq should be empty");
-	expect_ptr_null(mq_tryget(&mq),
-	    "mq_tryget() should fail when the queue is empty");
+	expect_ptr_null(
+	    mq_tryget(&mq), "mq_tryget() should fail when the queue is empty");
 
 	mq_put(&mq, &msg);
 	expect_u_eq(mq_count(&mq), 1, "mq should contain one message");
@@ -31,7 +31,7 @@ TEST_END
 
 static void *
 thd_receiver_start(void *arg) {
-	mq_t *mq = (mq_t *)arg;
+	mq_t    *mq = (mq_t *)arg;
 	unsigned i;
 
 	for (i = 0; i < (NSENDERS * NMSGS); i++) {
@@ -44,12 +44,12 @@ thd_receiver_start(void *arg) {
 
 static void *
 thd_sender_start(void *arg) {
-	mq_t *mq = (mq_t *)arg;
+	mq_t    *mq = (mq_t *)arg;
 	unsigned i;
 
 	for (i = 0; i < NMSGS; i++) {
 		mq_msg_t *msg;
-		void *p;
+		void     *p;
 		p = mallocx(sizeof(mq_msg_t), 0);
 		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 		msg = (mq_msg_t *)p;
@@ -59,9 +59,9 @@ thd_sender_start(void *arg) {
 }
 
 TEST_BEGIN(test_mq_threaded) {
-	mq_t mq;
-	thd_t receiver;
-	thd_t senders[NSENDERS];
+	mq_t     mq;
+	thd_t    receiver;
+	thd_t    senders[NSENDERS];
 	unsigned i;
 
 	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
@@ -82,8 +82,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_mq_basic,
-	    test_mq_threaded);
+	return test(test_mq_basic, test_mq_threaded);
 }
-
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 4aeebc13..0fe15a90 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define NTHREADS	2
-#define NINCRS		2000000
+#define NTHREADS 2
+#define NINCRS 2000000
 
 TEST_BEGIN(test_mtx_basic) {
 	mtx_t mtx;
@@ -14,14 +14,14 @@ TEST_BEGIN(test_mtx_basic) {
 TEST_END
 
 typedef struct {
-	mtx_t		mtx;
-	unsigned	x;
+	mtx_t    mtx;
+	unsigned x;
 } thd_start_arg_t;
 
 static void *
 thd_start(void *varg) {
 	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
-	unsigned i;
+	unsigned         i;
 
 	for (i = 0; i < NINCRS; i++) {
 		mtx_lock(&arg->mtx);
@@ -33,8 +33,8 @@ thd_start(void *varg) {
 
 TEST_BEGIN(test_mtx_race) {
 	thd_start_arg_t arg;
-	thd_t thds[NTHREADS];
-	unsigned i;
+	thd_t           thds[NTHREADS];
+	unsigned        i;
 
 	expect_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
 	arg.x = 0;
@@ -44,14 +44,12 @@ TEST_BEGIN(test_mtx_race) {
 	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
-	expect_u_eq(arg.x, NTHREADS * NINCRS,
-	    "Race-related counter corruption");
+	expect_u_eq(
+	    arg.x, NTHREADS * NINCRS, "Race-related counter corruption");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_mtx_basic,
-	    test_mtx_race);
+	return test(test_mtx_basic, test_mtx_race);
 }
diff --git a/test/unit/ncached_max.c b/test/unit/ncached_max.c
new file mode 100644
index 00000000..4724f55b
--- /dev/null
+++ b/test/unit/ncached_max.c
@@ -0,0 +1,268 @@
+#include "test/jemalloc_test.h"
+#include "test/san.h"
+
+const char *malloc_conf =
+    "tcache_ncached_max:256-1024:1001|2048-2048:0|8192-8192:1,tcache_max:4096";
+extern void tcache_bin_info_compute(
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
+extern bool                    tcache_get_default_ncached_max_set(szind_t ind);
+extern const cache_bin_info_t *tcache_get_default_ncached_max(void);
+
+static void
+check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	size_t mib_get[4], mib_get_len;
+	mib_get_len = sizeof(mib_get) / sizeof(size_t);
+	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
+	size_t      ncached_max;
+	size_t      sz = sizeof(size_t);
+	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		size_t bin_size = sz_index2size(i);
+		expect_d_eq(
+		    mallctlbymib(mib_get, mib_get_len, (void *)&ncached_max,
+		        &sz, (void *)&bin_size, sizeof(size_t)),
+		    0, "Unexpected mallctlbymib() failure");
+		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
+		    "Unexpected ncached_max for bin %d", i);
+		/* Check ncached_max returned under a non-bin size. */
+		bin_size--;
+		size_t temp_ncached_max = 0;
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		                (void *)&temp_ncached_max, &sz,
+		                (void *)&bin_size, sizeof(size_t)),
+		    0, "Unexpected mallctlbymib() failure");
+		expect_zu_eq(temp_ncached_max, ncached_max,
+		    "Unexpected ncached_max for inaccurate bin size.");
+	}
+}
+
+static void *
+ncached_max_check(void *args) {
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
+	tsd_t           *tsd = tsd_fetch();
+	tcache_t        *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+
+	tcache_bin_info_compute(tcache_bin_info);
+	memcpy(
+	    tcache_bin_info_backup, tcache_bin_info, sizeof(tcache_bin_info));
+	/* Check ncached_max set by malloc_conf. */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		bool           first_range = (i >= sz_size2index(256)
+                    && i <= sz_size2index(1024));
+		bool           second_range = (i == sz_size2index(2048));
+		bool           third_range = (i == sz_size2index(8192));
+		cache_bin_sz_t target_ncached_max = 0;
+		if (first_range || second_range || third_range) {
+			target_ncached_max = first_range
+			    ? 1001
+			    : (second_range ? 0 : 1);
+			expect_true(tcache_get_default_ncached_max_set(i),
+			    "Unexpected state for bin %u", i);
+			expect_zu_eq(target_ncached_max,
+			    tcache_bin_info[i].ncached_max,
+			    "Unexpected generated ncached_max for bin %u", i);
+			expect_zu_eq(target_ncached_max,
+			    tcache_get_default_ncached_max()[i].ncached_max,
+			    "Unexpected pre-set ncached_max for bin %u", i);
+		} else {
+			expect_false(tcache_get_default_ncached_max_set(i),
+			    "Unexpected state for bin %u", i);
+		}
+	}
+	unsigned nbins = tcache_nbins_get(tcache_slow);
+	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	/* Check the initial bin settings. */
+	check_bins_info(tcache_bin_info);
+
+	size_t mib_set[4], mib_set_len;
+	mib_set_len = sizeof(mib_set) / sizeof(size_t);
+	const char *set_name = "thread.tcache.ncached_max.write";
+	expect_d_eq(mallctlnametomib(set_name, mib_set, &mib_set_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	/* Test the ncached_max set with tcache on. */
+	char  inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
+	char *inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		if (i >= sz_size2index(8) && i <= sz_size2index(128)) {
+			cache_bin_info_init(&tcache_bin_info[i], 1);
+		}
+		if (i == sz_size2index(160)) {
+			cache_bin_info_init(&tcache_bin_info[i], 11);
+		}
+		if (i >= sz_size2index(170) && i <= sz_size2index(320)) {
+			cache_bin_info_init(&tcache_bin_info[i], 22);
+		}
+		if (i >= sz_size2index(224)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+		if (i >= nbins) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Close the tcache and set ncached_max of some bins.  It will be
+	 * set properly but thread.tcache.ncached_max.read still returns 0
+	 * since the bin is not available yet.  After enabling the tcache,
+	 * the new setting will not be carried on.  Instead, the default
+	 * settings will be applied.
+	 */
+	bool   e0 = false, e1;
+	size_t bool_sz = sizeof(bool);
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
+	expect_true(e1, "Unexpected previous tcache state");
+	strcpy(inputs, "0-112:8");
+	/* Setting returns ENOENT when the tcache is disabled. */
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    ENOENT, "Unexpected mallctlbymib() failure");
+	/* All ncached_max should return 0 once tcache is disabled. */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	e0 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
+	expect_false(e1, "Unexpected previous tcache state");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
+	     i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Set ncached_max of bins not enabled yet.  Then, enable them by
+	 * resetting tcache_max.  The ncached_max changes should stay.
+	 */
+	size_t tcache_max = 1024;
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&tcache_max, sizeof(size_t)),
+	    .0, "Unexpected.mallctl().failure");
+	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	strcpy(inputs, "2048-6144:123");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	tcache_max = 6144;
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&tcache_max, sizeof(size_t)),
+	    .0, "Unexpected.mallctl().failure");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
+		if (i <= sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 123);
+		} else if (i > sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/* Test an empty input, it should do nothing. */
+	strcpy(inputs, "");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test a half-done string, it should return EINVAL and do nothing. */
+	strcpy(inputs, "4-1024:7|256-1024");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test an invalid string with start size larger than end size.  It
+	 * should return success but do nothing.
+	 */
+	strcpy(inputs, "1024-256:7");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test a string exceeding the length limit, it should return EINVAL
+	 * and do nothing.
+	 */
+	char *long_inputs = (char *)malloc(10000 * sizeof(char));
+	expect_true(long_inputs != NULL, "Unexpected allocation failure.");
+	for (int i = 0; i < 200; i++) {
+		memcpy(long_inputs + i * 9, "4-1024:3|", 9);
+	}
+	memcpy(long_inputs + 200 * 9, "4-1024:3", 8);
+	long_inputs[200 * 9 + 8] = '\0';
+	inputp = long_inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+	free(long_inputs);
+
+	/*
+	 * Test a string with invalid characters, it should return EINVAL
+	 * and do nothing.
+	 */
+	strcpy(inputs, "k8-1024:77p");
+	inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test large ncached_max, it should return success but capped. */
+	strcpy(inputs, "1024-1024:65540");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
+	cache_bin_info_init(
+	    &tcache_bin_info[sz_size2index(1024)], CACHE_BIN_NCACHED_MAX);
+	check_bins_info(tcache_bin_info);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_ncached_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(san_uaf_detection_enabled());
+	/* TODO: change nthreads to 8 to reduce CI loads. */
+	unsigned nthreads = 108;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(&threads[i], ncached_max_check, NULL);
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_ncached_max);
+}
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 56238ab3..8c095d09 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define BILLION	UINT64_C(1000000000)
+#define BILLION UINT64_C(1000000000)
 
 TEST_BEGIN(test_nstime_init) {
 	nstime_t nst;
@@ -43,24 +43,24 @@ TEST_BEGIN(test_nstime_compare) {
 	nstime_init2(&nstb, 42, 42);
 	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
-	    "nstb should be less than nsta");
+	expect_d_eq(
+	    nstime_compare(&nstb, &nsta), -1, "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 42, 44);
-	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
-	    "nsta should be less than nstb");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), -1, "nsta should be less than nstb");
 	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 
 	nstime_init2(&nstb, 41, BILLION - 1);
 	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
-	    "nstb should be less than nsta");
+	expect_d_eq(
+	    nstime_compare(&nstb, &nsta), -1, "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 43, 0);
-	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
-	    "nsta should be less than nstb");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), -1, "nsta should be less than nstb");
 	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 }
@@ -73,15 +73,15 @@ TEST_BEGIN(test_nstime_add) {
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 84, 86);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 85, BILLION - 2);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 }
 TEST_END
 
@@ -91,14 +91,14 @@ TEST_BEGIN(test_nstime_iadd) {
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_iadd(&nsta, 1);
 	nstime_init2(&nstb, 43, 0);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, 1);
 	nstime_iadd(&nsta, BILLION + 1);
 	nstime_init2(&nstb, 43, 2);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 }
 TEST_END
 
@@ -109,15 +109,15 @@ TEST_BEGIN(test_nstime_subtract) {
 	nstime_copy(&nstb, &nsta);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init_zero(&nstb);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_init2(&nstb, 41, 44);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 }
 TEST_END
 
@@ -125,16 +125,16 @@ TEST_BEGIN(test_nstime_isubtract) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
-	nstime_isubtract(&nsta, 42*BILLION + 43);
+	nstime_isubtract(&nsta, 42 * BILLION + 43);
 	nstime_init_zero(&nstb);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
-	nstime_isubtract(&nsta, 41*BILLION + 44);
+	nstime_isubtract(&nsta, 41 * BILLION + 44);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 }
 TEST_END
 
@@ -144,14 +144,14 @@ TEST_BEGIN(test_nstime_imultiply) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_imultiply(&nsta, 10);
 	nstime_init2(&nstb, 420, 430);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect multiplication result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect multiplication result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_imultiply(&nsta, 3);
 	nstime_init2(&nstb, 127, 999999998);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect multiplication result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect multiplication result");
 }
 TEST_END
 
@@ -162,15 +162,15 @@ TEST_BEGIN(test_nstime_idivide) {
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_idivide(&nsta, 10);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect division result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 3);
 	nstime_idivide(&nsta, 3);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect division result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect division result");
 }
 TEST_END
 
@@ -180,28 +180,28 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 10, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_add(&nsta, &nstc);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 10, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_subtract(&nsta, &nstc);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 9,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 9, "Incorrect division result");
 }
 TEST_END
 
-void
+static void
 test_nstime_since_once(nstime_t *t) {
 	nstime_t old_t;
 	nstime_copy(&old_t, t);
@@ -213,8 +213,8 @@ test_nstime_since_once(nstime_t *t) {
 	nstime_copy(&new_t, t);
 	nstime_subtract(&new_t, &old_t);
 
-	expect_u64_ge(nstime_ns(&new_t), ns_since,
-	    "Incorrect time since result");
+	expect_u64_ge(
+	    nstime_ns(&new_t), ns_since, "Incorrect time since result");
 }
 
 TEST_BEGIN(test_nstime_ns_since) {
@@ -228,6 +228,24 @@ TEST_BEGIN(test_nstime_ns_since) {
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_ms_since) {
+	nstime_t delta;
+
+	nstime_init2(&delta, /* sec */ 1, /* nsec */ 0);
+	for (uint64_t i = 0; i < 10000; i++) {
+		nstime_t now;
+		nstime_init_update(&now);
+
+		nstime_t past;
+		nstime_copy(&past, &now);
+		nstime_subtract(&past, &delta);
+
+		expect_u64_ge(nstime_ms_since(&past), nstime_ms(&delta),
+		    "Incorrect time since result");
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_monotonic) {
 	nstime_monotonic();
 }
@@ -235,18 +253,9 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_nstime_init,
-	    test_nstime_init2,
-	    test_nstime_copy,
-	    test_nstime_compare,
-	    test_nstime_add,
-	    test_nstime_iadd,
-	    test_nstime_subtract,
-	    test_nstime_isubtract,
-	    test_nstime_imultiply,
-	    test_nstime_idivide,
-	    test_nstime_divide,
-	    test_nstime_ns_since,
-	    test_nstime_monotonic);
+	return test(test_nstime_init, test_nstime_init2, test_nstime_copy,
+	    test_nstime_compare, test_nstime_add, test_nstime_iadd,
+	    test_nstime_subtract, test_nstime_isubtract, test_nstime_imultiply,
+	    test_nstime_idivide, test_nstime_divide, test_nstime_ns_since,
+	    test_nstime_ms_since, test_nstime_monotonic);
 }
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
index 44a8f76a..5d9aae10 100644
--- a/test/unit/oversize_threshold.c
+++ b/test/unit/oversize_threshold.c
@@ -5,7 +5,7 @@
 static void
 arena_mallctl(const char *mallctl_str, unsigned arena, void *oldp,
     size_t *oldlen, void *newp, size_t newlen) {
-	int err;
+	int  err;
 	char buf[100];
 	malloc_snprintf(buf, sizeof(buf), mallctl_str, arena);
 
@@ -14,13 +14,13 @@ arena_mallctl(const char *mallctl_str, unsigned arena, void *oldp,
 }
 
 TEST_BEGIN(test_oversize_threshold_get_set) {
-	int err;
+	int    err;
 	size_t old_threshold;
 	size_t new_threshold;
 	size_t threshold_sz = sizeof(old_threshold);
 
 	unsigned arena;
-	size_t arena_sz = sizeof(arena);
+	size_t   arena_sz = sizeof(arena);
 	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
 	expect_d_eq(0, err, "Arena creation failed");
 
@@ -38,13 +38,14 @@ TEST_BEGIN(test_oversize_threshold_get_set) {
 	/* Just a read */
 	arena_mallctl("arena.%u.oversize_threshold", arena, &old_threshold,
 	    &threshold_sz, NULL, 0);
-	expect_zu_eq(2 * 1024 * 1024, old_threshold, "Should have read old value");
+	expect_zu_eq(
+	    2 * 1024 * 1024, old_threshold, "Should have read old value");
 }
 TEST_END
 
 static size_t max_purged = 0;
 static bool
-purge_forced_record_max(extent_hooks_t* hooks, void *addr, size_t sz,
+purge_forced_record_max(extent_hooks_t *hooks, void *addr, size_t sz,
     size_t offset, size_t length, unsigned arena_ind) {
 	if (length > max_purged) {
 		max_purged = length;
@@ -73,7 +74,7 @@ TEST_BEGIN(test_oversize_threshold) {
 	int err;
 
 	unsigned arena;
-	size_t arena_sz = sizeof(arena);
+	size_t   arena_sz = sizeof(arena);
 	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
 	expect_d_eq(0, err, "Arena creation failed");
 	arena_mallctl("arena.%u.extent_hooks", arena, NULL, NULL, &extent_hooks,
@@ -120,14 +121,15 @@ TEST_BEGIN(test_oversize_threshold) {
 	 */
 	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	expect_zu_ge(max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
+	if (!is_background_thread_enabled()) {
+		expect_zu_ge(
+		    max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
+	}
 }
 TEST_END
 
 int
 main(void) {
 	return test_no_reentrancy(
-	    test_oversize_threshold_get_set,
-	    test_oversize_threshold);
+	    test_oversize_threshold_get_set, test_oversize_threshold);
 }
-
diff --git a/test/unit/pa.c b/test/unit/pa.c
index b1e2f6e9..c1562d7b 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -16,8 +16,8 @@ merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 }
 
 static bool
-split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size, size_t size_a,
+    size_t size_b, bool committed, unsigned arena_ind) {
 	return !maps_coalesce;
 }
 
@@ -39,16 +39,17 @@ init_test_extent_hooks(extent_hooks_t *hooks) {
 
 typedef struct test_data_s test_data_t;
 struct test_data_s {
-	pa_shard_t shard;
-	pa_central_t central;
-	base_t *base;
-	emap_t emap;
+	pa_shard_t       shard;
+	pa_central_t     central;
+	base_t          *base;
+	emap_t           emap;
 	pa_shard_stats_t stats;
-	malloc_mutex_t stats_mtx;
-	extent_hooks_t hooks;
+	malloc_mutex_t   stats_mtx;
+	extent_hooks_t   hooks;
 };
 
-test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+static test_data_t *
+init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	test_data_t *test_data = calloc(1, sizeof(test_data_t));
 	assert_ptr_not_null(test_data, "");
 	init_test_extent_hooks(&test_data->hooks);
@@ -65,8 +66,8 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	nstime_t time;
 	nstime_init(&time, 0);
 
-	err = pa_central_init(&test_data->central, base, opt_hpa,
-	    &hpa_hooks_default);
+	err = pa_central_init(
+	    &test_data->central, base, opt_hpa, &hpa_hooks_default);
 	assert_false(err, "");
 
 	const size_t pa_oversize_threshold = 8 * 1024 * 1024;
@@ -79,7 +80,8 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	return test_data;
 }
 
-void destroy_test_data(test_data_t *data) {
+void
+destroy_test_data(test_data_t *data) {
 	base_delete(TSDN_NULL, data->base);
 	free(data);
 }
@@ -88,28 +90,28 @@ static void *
 do_alloc_free_purge(void *arg) {
 	test_data_t *test_data = (test_data_t *)arg;
 	for (int i = 0; i < 10 * 1000; i++) {
-		bool deferred_work_generated = false;
+		bool     deferred_work_generated = false;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
 		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
 		    /* guarded */ false, &deferred_work_generated);
 		assert_ptr_not_null(edata, "");
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
 		    &deferred_work_generated);
-		malloc_mutex_lock(TSDN_NULL,
-		    &test_data->shard.pac.decay_dirty.mtx);
+		malloc_mutex_lock(
+		    TSDN_NULL, &test_data->shard.pac.decay_dirty.mtx);
 		pac_decay_all(TSDN_NULL, &test_data->shard.pac,
 		    &test_data->shard.pac.decay_dirty,
 		    &test_data->shard.pac.stats->decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);
-		malloc_mutex_unlock(TSDN_NULL,
-		    &test_data->shard.pac.decay_dirty.mtx);
+		malloc_mutex_unlock(
+		    TSDN_NULL, &test_data->shard.pac.decay_dirty.mtx);
 	}
 	return NULL;
 }
 
 TEST_BEGIN(test_alloc_free_purge_thds) {
 	test_data_t *test_data = init_test_data(0, 0);
-	thd_t thds[4];
+	thd_t        thds[4];
 	for (int i = 0; i < 4; i++) {
 		thd_create(&thds[i], do_alloc_free_purge, test_data);
 	}
@@ -119,8 +121,52 @@ TEST_BEGIN(test_alloc_free_purge_thds) {
 }
 TEST_END
 
+TEST_BEGIN(test_failed_coalesce_releases_neighbor) {
+	test_skip_if(!maps_coalesce);
+
+	test_data_t *test_data = init_test_data(-1, -1);
+	size_t old_lg_extent_max_active_fit = opt_lg_extent_max_active_fit;
+	opt_lg_extent_max_active_fit = 0;
+
+	bool     deferred_work_generated = false;
+	size_t   unit = SC_LARGE_MINCLASS;
+	size_t   alloc_size = 4 * unit;
+	edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, alloc_size,
+	    PAGE,
+	    /* slab */ false, sz_size2index(alloc_size), /* zero */ false,
+	    /* guarded */ false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected pa_alloc() failure");
+
+	void *tail_addr = (void *)((uintptr_t)edata_base_get(edata) + unit);
+	expect_false(pa_shrink(TSDN_NULL, &test_data->shard, edata, alloc_size,
+	                 unit, sz_size2index(unit), &deferred_work_generated),
+	    "Unexpected pa_shrink() failure");
+
+	edata_t *tail = emap_edata_lookup(
+	    TSDN_NULL, &test_data->emap, tail_addr);
+	expect_ptr_not_null(tail, "Expected dirty tail extent after shrink");
+	expect_ptr_eq(
+	    edata_base_get(tail), tail_addr, "Unexpected tail extent address");
+	expect_zu_eq(
+	    edata_size_get(tail), 3 * unit, "Unexpected tail extent size");
+	expect_d_eq(edata_state_get(tail), extent_state_dirty,
+	    "Expected tail extent to start dirty");
+
+	pa_dalloc(
+	    TSDN_NULL, &test_data->shard, edata, &deferred_work_generated);
+
+	tail = emap_edata_lookup(TSDN_NULL, &test_data->emap, tail_addr);
+	expect_ptr_not_null(
+	    tail, "Expected oversized dirty neighbor to remain discoverable");
+	expect_d_eq(edata_state_get(tail), extent_state_dirty,
+	    "Failed coalesce must release oversized dirty neighbor");
+
+	opt_lg_extent_max_active_fit = old_lg_extent_max_active_fit;
+}
+TEST_END
+
 int
 main(void) {
 	return test(
-	    test_alloc_free_purge_thds);
+	    test_alloc_free_purge_thds, test_failed_coalesce_releases_neighbor);
 }
diff --git a/test/unit/pack.c b/test/unit/pack.c
index e6392825..e3024512 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -4,9 +4,9 @@
  * Size class that is a divisor of the page size, ideally 4+ regions per run.
  */
 #if LG_PAGE <= 14
-#define SZ	(ZU(1) << (LG_PAGE - 2))
+#	define SZ (ZU(1) << (LG_PAGE - 2))
 #else
-#define SZ	ZU(4096)
+#	define SZ ZU(4096)
 #endif
 
 /*
@@ -14,11 +14,11 @@
  * if mmap()ed memory grows downward, downward growth of mmap()ed memory is
  * tested.
  */
-#define NSLABS	8
+#define NSLABS 8
 
 static unsigned
 binind_compute(void) {
-	size_t sz;
+	size_t   sz;
 	unsigned nbins, i;
 
 	sz = sizeof(nbins);
@@ -27,16 +27,17 @@ binind_compute(void) {
 
 	for (i = 0; i < nbins; i++) {
 		size_t mib[4];
-		size_t miblen = sizeof(mib)/sizeof(size_t);
+		size_t miblen = sizeof(mib) / sizeof(size_t);
 		size_t size;
 
-		expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
-		    &miblen), 0, "Unexpected mallctlnametomb failure");
+		expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib, &miblen),
+		    0, "Unexpected mallctlnametomb failure");
 		mib[2] = (size_t)i;
 
 		sz = sizeof(size);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
-		    0), 0, "Unexpected mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&size, &sz, NULL, 0), 0,
+		    "Unexpected mallctlbymib failure");
 		if (size == SZ) {
 			return i;
 		}
@@ -49,24 +50,24 @@ binind_compute(void) {
 static size_t
 nregs_per_run_compute(void) {
 	uint32_t nregs;
-	size_t sz;
+	size_t   sz;
 	unsigned binind = binind_compute();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	expect_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
 	    "Unexpected mallctlnametomb failure");
 	mib[2] = (size_t)binind;
 	sz = sizeof(nregs);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
-	    0), 0, "Unexpected mallctlbymib failure");
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib failure");
 	return nregs;
 }
 
 static unsigned
 arenas_create_mallctl(void) {
 	unsigned arena_ind;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
@@ -78,7 +79,7 @@ arenas_create_mallctl(void) {
 static void
 arena_reset_mallctl(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 
 	expect_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
@@ -88,23 +89,23 @@ arena_reset_mallctl(unsigned arena_ind) {
 }
 
 TEST_BEGIN(test_pack) {
-	bool prof_enabled;
+	bool   prof_enabled;
 	size_t sz = sizeof(prof_enabled);
 	if (mallctl("opt.prof", (void *)&prof_enabled, &sz, NULL, 0) == 0) {
 		test_skip_if(prof_enabled);
 	}
 
 	unsigned arena_ind = arenas_create_mallctl();
-	size_t nregs_per_run = nregs_per_run_compute();
-	size_t nregs = nregs_per_run * NSLABS;
+	size_t   nregs_per_run = nregs_per_run_compute();
+	size_t   nregs = nregs_per_run * NSLABS;
 	VARIABLE_ARRAY(void *, ptrs, nregs);
 	size_t i, j, offset;
 
 	/* Fill matrix. */
 	for (i = offset = 0; i < NSLABS; i++) {
 		for (j = 0; j < nregs_per_run; j++) {
-			void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			void *p = mallocx(
+			    SZ, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p,
 			    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |"
 			    " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu",
@@ -119,16 +120,15 @@ TEST_BEGIN(test_pack) {
 	 * layout policy.
 	 */
 	offset = 0;
-	for (i = offset = 0;
-	    i < NSLABS;
-	    i++, offset = (offset + 1) % nregs_per_run) {
+	for (i = offset = 0; i < NSLABS;
+	     i++, offset = (offset + 1) % nregs_per_run) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p = ptrs[(i * nregs_per_run) + j];
 			if (offset == j) {
 				continue;
 			}
-			dallocx(p, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			dallocx(
+			    p, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 		}
 	}
 
@@ -137,17 +137,16 @@ TEST_BEGIN(test_pack) {
 	 * that the matrix is unmodified.
 	 */
 	offset = 0;
-	for (i = offset = 0;
-	    i < NSLABS;
-	    i++, offset = (offset + 1) % nregs_per_run) {
+	for (i = offset = 0; i < NSLABS;
+	     i++, offset = (offset + 1) % nregs_per_run) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p;
 
 			if (offset == j) {
 				continue;
 			}
-			p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			p = mallocx(
+			    SZ, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 			expect_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
 			    "Unexpected refill discrepancy, run=%zu, reg=%zu\n",
 			    i, j);
@@ -161,6 +160,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_pack);
+	return test(test_pack);
 }
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 8dfd1a72..66afb84b 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -2,20 +2,21 @@
 
 TEST_BEGIN(test_pages_huge) {
 	size_t alloc_size;
-	bool commit;
-	void *pages, *hugepage;
+	bool   commit;
+	void  *pages, *hugepage;
 
 	alloc_size = HUGEPAGE * 2 - PAGE;
 	commit = true;
 	pages = pages_map(NULL, alloc_size, PAGE, &commit);
 	expect_ptr_not_null(pages, "Unexpected pages_map() error");
 
-	if (init_system_thp_mode == thp_mode_default) {
-	    hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	    expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
-	        "Unexpected pages_huge() result");
-	    expect_false(pages_nohuge(hugepage, HUGEPAGE),
-	        "Unexpected pages_nohuge() result");
+	if (init_system_thp_mode == system_thp_mode_madvise) {
+		hugepage = (void *)(ALIGNMENT_CEILING(
+		    (uintptr_t)pages, HUGEPAGE));
+		expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
+		    "Unexpected pages_huge() result");
+		expect_false(pages_nohuge(hugepage, HUGEPAGE),
+		    "Unexpected pages_nohuge() result");
 	}
 
 	pages_unmap(pages, alloc_size);
@@ -24,6 +25,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_pages_huge);
+	return test(test_pages_huge);
 }
diff --git a/test/unit/peak.c b/test/unit/peak.c
index 11129785..80eda30d 100644
--- a/test/unit/peak.c
+++ b/test/unit/peak.c
@@ -4,11 +4,10 @@
 
 TEST_BEGIN(test_peak) {
 	peak_t peak = PEAK_INITIALIZER;
-	expect_u64_eq(0, peak_max(&peak),
-	    "Peak should be zero at initialization");
+	expect_u64_eq(
+	    0, peak_max(&peak), "Peak should be zero at initialization");
 	peak_update(&peak, 100, 50);
-	expect_u64_eq(50, peak_max(&peak),
-	    "Missed update");
+	expect_u64_eq(50, peak_max(&peak), "Missed update");
 	peak_update(&peak, 100, 100);
 	expect_u64_eq(50, peak_max(&peak), "Dallocs shouldn't change peak");
 	peak_update(&peak, 100, 200);
@@ -42,6 +41,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_peak);
+	return test_no_reentrancy(test_peak);
 }
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 28f5e488..c9e4da9c 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -2,14 +2,15 @@
 
 #include "jemalloc/internal/ph.h"
 
+#define BFS_ENUMERATE_MAX 30
 typedef struct node_s node_t;
-ph_structs(heap, node_t);
+ph_structs(heap, node_t, BFS_ENUMERATE_MAX);
 
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
-	uint32_t magic;
+	uint32_t    magic;
 	heap_link_t link;
-	uint64_t key;
+	uint64_t    key;
 };
 
 static int
@@ -30,7 +31,6 @@ node_cmp(const node_t *a, const node_t *b) {
 
 static int
 node_cmp_magic(const node_t *a, const node_t *b) {
-
 	expect_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
 	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
@@ -57,12 +57,12 @@ node_lchild_get(const node_t *node) {
 static void
 node_print(const node_t *node, unsigned depth) {
 	unsigned i;
-	node_t *leftmost_child, *sibling;
+	node_t  *leftmost_child, *sibling;
 
 	for (i = 0; i < depth; i++) {
 		malloc_printf("\t");
 	}
-	malloc_printf("%2"FMTu64"\n", node->key);
+	malloc_printf("%2" FMTu64 "\n", node->key);
 
 	leftmost_child = node_lchild_get(node);
 	if (leftmost_child == NULL) {
@@ -70,8 +70,8 @@ node_print(const node_t *node, unsigned depth) {
 	}
 	node_print(leftmost_child, depth + 1);
 
-	for (sibling = node_next_get(leftmost_child); sibling !=
-	    NULL; sibling = node_next_get(sibling)) {
+	for (sibling = node_next_get(leftmost_child); sibling != NULL;
+	     sibling = node_next_get(sibling)) {
 		node_print(sibling, depth + 1);
 	}
 }
@@ -88,7 +88,7 @@ heap_print(const heap_t *heap) {
 	node_print(heap->ph.root, 0);
 
 	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
-	    auxelm = node_next_get(auxelm)) {
+	     auxelm = node_next_get(auxelm)) {
 		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		node_print(auxelm, 0);
@@ -101,7 +101,7 @@ label_return:
 static unsigned
 node_validate(const node_t *node, const node_t *parent) {
 	unsigned nnodes = 1;
-	node_t *leftmost_child, *sibling;
+	node_t  *leftmost_child, *sibling;
 
 	if (parent != NULL) {
 		expect_d_ge(node_cmp_magic(node, parent), 0,
@@ -112,12 +112,12 @@ node_validate(const node_t *node, const node_t *parent) {
 	if (leftmost_child == NULL) {
 		return nnodes;
 	}
-	expect_ptr_eq(node_prev_get(leftmost_child),
-	    (void *)node, "Leftmost child does not link to node");
+	expect_ptr_eq(node_prev_get(leftmost_child), (void *)node,
+	    "Leftmost child does not link to node");
 	nnodes += node_validate(leftmost_child, node);
 
-	for (sibling = node_next_get(leftmost_child); sibling !=
-	    NULL; sibling = node_next_get(sibling)) {
+	for (sibling = node_next_get(leftmost_child); sibling != NULL;
+	     sibling = node_next_get(sibling)) {
 		expect_ptr_eq(node_next_get(node_prev_get(sibling)), sibling,
 		    "sibling's prev doesn't link to sibling");
 		nnodes += node_validate(sibling, node);
@@ -128,7 +128,7 @@ node_validate(const node_t *node, const node_t *parent) {
 static unsigned
 heap_validate(const heap_t *heap) {
 	unsigned nnodes = 0;
-	node_t *auxelm;
+	node_t  *auxelm;
 
 	if (heap->ph.root == NULL) {
 		goto label_return;
@@ -137,7 +137,7 @@ heap_validate(const heap_t *heap) {
 	nnodes += node_validate(heap->ph.root, NULL);
 
 	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
-	    auxelm = node_next_get(auxelm)) {
+	     auxelm = node_next_get(auxelm)) {
 		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		nnodes += node_validate(auxelm, NULL);
@@ -185,10 +185,10 @@ TEST_BEGIN(test_ph_random) {
 #define NNODES 25
 #define NBAGS 250
 #define SEED 42
-	sfmt_t *sfmt;
+	sfmt_t  *sfmt;
 	uint64_t bag[NNODES];
-	heap_t heap;
-	node_t nodes[NNODES];
+	heap_t   heap;
+	node_t   nodes[NNODES];
 	unsigned i, j, k;
 
 	sfmt = init_gen_rand(SEED);
@@ -215,8 +215,8 @@ TEST_BEGIN(test_ph_random) {
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
 			heap_new(&heap);
-			expect_u_eq(heap_validate(&heap), 0,
-			    "Incorrect node count");
+			expect_u_eq(
+			    heap_validate(&heap), 0, "Incorrect node count");
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
@@ -236,8 +236,24 @@ TEST_BEGIN(test_ph_random) {
 				    "Incorrect node count");
 			}
 
-			expect_false(heap_empty(&heap),
-			    "Heap should not be empty");
+			expect_false(
+			    heap_empty(&heap), "Heap should not be empty");
+
+			/* Enumerate nodes. */
+			heap_enumerate_helper_t helper;
+			uint16_t max_queue_size = sizeof(helper.bfs_queue)
+			    / sizeof(void *);
+			expect_u_eq(max_queue_size, BFS_ENUMERATE_MAX,
+			    "Incorrect bfs queue length initialized");
+			assert(max_queue_size == BFS_ENUMERATE_MAX);
+			heap_enumerate_prepare(
+			    &heap, &helper, BFS_ENUMERATE_MAX, max_queue_size);
+			size_t node_count = 0;
+			while (heap_enumerate_next(&heap, &helper)) {
+				node_count++;
+			}
+			expect_lu_eq(
+			    node_count, j, "Unexpected enumeration results.");
 
 			/* Remove nodes. */
 			switch (i % 6) {
@@ -246,13 +262,13 @@ TEST_BEGIN(test_ph_random) {
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, &nodes[k]);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
 			case 1:
 				for (k = j; k > 0; k--) {
-					node_remove(&heap, &nodes[k-1]);
+					node_remove(&heap, &nodes[k - 1]);
 					expect_u_eq(heap_validate(&heap), k - 1,
 					    "Incorrect node count");
 				}
@@ -261,58 +277,62 @@ TEST_BEGIN(test_ph_random) {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = node_remove_first(&heap);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 					if (prev != NULL) {
-						expect_d_ge(node_cmp(node,
-						    prev), 0,
+						expect_d_ge(
+						    node_cmp(node, prev), 0,
 						    "Bad removal order");
 					}
 					prev = node;
 				}
 				break;
-			} case 3: {
+			}
+			case 3: {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_first(&heap);
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					if (prev != NULL) {
-						expect_d_ge(node_cmp(node,
-						    prev), 0,
+						expect_d_ge(
+						    node_cmp(node, prev), 0,
 						    "Bad removal order");
 					}
 					node_remove(&heap, node);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 					prev = node;
 				}
 				break;
-			} case 4: {
+			}
+			case 4: {
 				for (k = 0; k < j; k++) {
 					node_remove_any(&heap);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
-			} case 5: {
+			}
+			case 5: {
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_any(&heap);
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, node);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
-			} default:
+			}
+			default:
 				not_reached();
 			}
 
-			expect_ptr_null(heap_first(&heap),
-			    "Heap should be empty");
-			expect_ptr_null(heap_any(&heap),
-			    "Heap should be empty");
+			expect_ptr_null(
+			    heap_first(&heap), "Heap should be empty");
+			expect_ptr_null(
+			    heap_any(&heap), "Heap should be empty");
 			expect_true(heap_empty(&heap), "Heap should be empty");
 		}
 	}
@@ -324,7 +344,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ph_empty,
-	    test_ph_random);
+	return test(test_ph_empty, test_ph_random);
 }
diff --git a/test/unit/prng.c b/test/unit/prng.c
index a6d9b014..20b8470e 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -9,32 +9,31 @@ TEST_BEGIN(test_prng_lg_range_u32) {
 	ra = prng_lg_range_u32(&sa, 32);
 	sa = 42;
 	rb = prng_lg_range_u32(&sa, 32);
-	expect_u32_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_u32_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_u32(&sb, 32);
-	expect_u32_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_u32_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_u32(&sa, 32);
 	rb = prng_lg_range_u32(&sa, 32);
-	expect_u32_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_u32_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_u32(&sa, 32);
 	for (lg_range = 31; lg_range > 0; lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_u32(&sb, lg_range);
-		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
-		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)), 0,
+		    "High order bits should be 0, lg_range=%u", lg_range);
 		expect_u32_eq(rb, (ra >> (32 - lg_range)),
 		    "Expected high order bits of full-width result, "
-		    "lg_range=%u", lg_range);
+		    "lg_range=%u",
+		    lg_range);
 	}
-
 }
 TEST_END
 
@@ -46,19 +45,18 @@ TEST_BEGIN(test_prng_lg_range_u64) {
 	ra = prng_lg_range_u64(&sa, 64);
 	sa = 42;
 	rb = prng_lg_range_u64(&sa, 64);
-	expect_u64_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_u64_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_u64(&sb, 64);
-	expect_u64_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_u64_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_u64(&sa, 64);
 	rb = prng_lg_range_u64(&sa, 64);
-	expect_u64_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_u64_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_u64(&sa, 64);
@@ -69,47 +67,48 @@ TEST_BEGIN(test_prng_lg_range_u64) {
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		expect_u64_eq(rb, (ra >> (64 - lg_range)),
 		    "Expected high order bits of full-width result, "
-		    "lg_range=%u", lg_range);
+		    "lg_range=%u",
+		    lg_range);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu) {
-	size_t sa, sb;
-	size_t ra, rb;
+	size_t   sa, sb;
+	size_t   ra, rb;
 	unsigned lg_range;
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	sa = 42;
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_zu_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_zu_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_zu_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
-	    lg_range--) {
+	     lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_zu(&sb, lg_range);
-		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)),
-		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		expect_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
-		    lg_range)), "Expected high order bits of full-width "
-		    "result, lg_range=%u", lg_range);
+		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)), 0,
+		    "High order bits should be 0, lg_range=%u", lg_range);
+		expect_zu_eq(rb,
+		    (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range)),
+		    "Expected high order bits of full-width "
+		    "result, lg_range=%u",
+		    lg_range);
 	}
-
 }
 TEST_END
 
@@ -158,13 +157,12 @@ TEST_END
 TEST_BEGIN(test_prng_range_zu) {
 	size_t range;
 
-	const size_t max_range = 10000000;
-	const size_t range_step = 97;
+	const size_t   max_range = 10000000;
+	const size_t   range_step = 97;
 	const unsigned nreps = 10;
 
-
 	for (range = 2; range < max_range; range += range_step) {
-		size_t s;
+		size_t   s;
 		unsigned rep;
 
 		s = range;
@@ -179,11 +177,7 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prng_lg_range_u32,
-	    test_prng_lg_range_u64,
-	    test_prng_lg_range_zu,
-	    test_prng_range_u32,
-	    test_prng_range_u64,
-	    test_prng_range_zu);
+	return test_no_reentrancy(test_prng_lg_range_u32,
+	    test_prng_lg_range_u64, test_prng_lg_range_zu, test_prng_range_u32,
+	    test_prng_range_u64, test_prng_range_zu);
 }
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index ef392acd..940468b9 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -3,10 +3,10 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#define NTHREADS		4
-#define NALLOCS_PER_THREAD	50
-#define DUMP_INTERVAL		1
-#define BT_COUNT_CHECK_INTERVAL	5
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD 50
+#define DUMP_INTERVAL 1
+#define BT_COUNT_CHECK_INTERVAL 5
 
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
@@ -20,13 +20,13 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 static void *
 alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) {
-	return btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration);
+	return btalloc(1, thd_ind * NALLOCS_PER_THREAD + iteration);
 }
 
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
-	size_t bt_count_prev, bt_count;
+	size_t   bt_count_prev, bt_count;
 	unsigned i_prev, i;
 
 	i_prev = 0;
@@ -39,10 +39,10 @@ thd_start(void *varg) {
 			    0, "Unexpected error while dumping heap profile");
 		}
 
-		if (i % BT_COUNT_CHECK_INTERVAL == 0 ||
-		    i+1 == NALLOCS_PER_THREAD) {
+		if (i % BT_COUNT_CHECK_INTERVAL == 0
+		    || i + 1 == NALLOCS_PER_THREAD) {
 			bt_count = prof_bt_count();
-			expect_zu_le(bt_count_prev+(i-i_prev), bt_count,
+			expect_zu_le(bt_count_prev + (i - i_prev), bt_count,
 			    "Expected larger backtrace count increase");
 			i_prev = i;
 			bt_count_prev = bt_count;
@@ -53,17 +53,17 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_idump) {
-	bool active;
-	thd_t thds[NTHREADS];
+	bool     active;
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
 	test_skip_if(!config_prof);
 
 	active = true;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -79,6 +79,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_idump);
+	return test_no_reentrancy(test_idump);
 }
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index af29e7ad..fc8b150b 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -4,37 +4,37 @@
 
 static void
 mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
-	bool old;
+	bool   old;
 	size_t sz;
 
 	sz = sizeof(old);
 	expect_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
-	expect_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
-	    name);
+	expect_b_eq(
+	    old, expected, "%s():%d: Unexpected %s value", func, line, name);
 }
 
 static void
 mallctl_bool_set(const char *name, bool old_expected, bool val_new,
     const char *func, int line) {
-	bool old;
+	bool   old;
 	size_t sz;
 
 	sz = sizeof(old);
-	expect_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
-	    sizeof(val_new)), 0,
-	    "%s():%d: Unexpected mallctl failure reading/writing %s", func,
+	expect_d_eq(
+	    mallctl(name, (void *)&old, &sz, (void *)&val_new, sizeof(val_new)),
+	    0, "%s():%d: Unexpected mallctl failure reading/writing %s", func,
 	    line, name);
 	expect_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
 	    line, name);
 }
 
 static void
-mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
-    int line) {
+mallctl_prof_active_get_impl(
+    bool prof_active_old_expected, const char *func, int line) {
 	mallctl_bool_get("prof.active", prof_active_old_expected, func, line);
 }
-#define mallctl_prof_active_get(a)					\
+#define mallctl_prof_active_get(a)                                             \
 	mallctl_prof_active_get_impl(a, __func__, __LINE__)
 
 static void
@@ -43,16 +43,16 @@ mallctl_prof_active_set_impl(bool prof_active_old_expected,
 	mallctl_bool_set("prof.active", prof_active_old_expected,
 	    prof_active_new, func, line);
 }
-#define mallctl_prof_active_set(a, b)					\
+#define mallctl_prof_active_set(a, b)                                          \
 	mallctl_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
-mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
-    const char *func, int line) {
-	mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected,
-	    func, line);
+mallctl_thread_prof_active_get_impl(
+    bool thread_prof_active_old_expected, const char *func, int line) {
+	mallctl_bool_get(
+	    "thread.prof.active", thread_prof_active_old_expected, func, line);
 }
-#define mallctl_thread_prof_active_get(a)				\
+#define mallctl_thread_prof_active_get(a)                                      \
 	mallctl_thread_prof_active_get_impl(a, __func__, __LINE__)
 
 static void
@@ -61,24 +61,23 @@ mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
 	mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected,
 	    thread_prof_active_new, func, line);
 }
-#define mallctl_thread_prof_active_set(a, b)				\
+#define mallctl_thread_prof_active_set(a, b)                                   \
 	mallctl_thread_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
 prof_sampling_probe_impl(bool expect_sample, const char *func, int line) {
-	void *p;
+	void  *p;
 	size_t expected_backtraces = expect_sample ? 1 : 0;
 
-	expect_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
-	    line);
+	expect_zu_eq(
+	    prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func, line);
 	p = mallocx(1, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	expect_zu_eq(prof_bt_count(), expected_backtraces,
 	    "%s():%d: Unexpected backtrace count", func, line);
 	dallocx(p, 0);
 }
-#define prof_sampling_probe(a)						\
-	prof_sampling_probe_impl(a, __func__, __LINE__)
+#define prof_sampling_probe(a) prof_sampling_probe_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_active) {
 	test_skip_if(!config_prof);
@@ -114,6 +113,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prof_active);
+	return test_no_reentrancy(test_prof_active);
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 46e45036..4cca9bdb 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -18,16 +18,16 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 TEST_BEGIN(test_gdump) {
 	test_skip_if(opt_hpa);
-	bool active, gdump, gdump_old;
-	void *p, *q, *r, *s;
+	bool   active, gdump, gdump_old;
+	void  *p, *q, *r, *s;
 	size_t sz;
 
 	test_skip_if(!config_prof);
 
 	active = true;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -44,8 +44,8 @@ TEST_BEGIN(test_gdump) {
 	gdump = false;
 	sz = sizeof(gdump_old);
 	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
-	    (void *)&gdump, sizeof(gdump)), 0,
-	    "Unexpected mallctl failure while disabling prof.gdump");
+	                (void *)&gdump, sizeof(gdump)),
+	    0, "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
 	r = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
@@ -55,8 +55,8 @@ TEST_BEGIN(test_gdump) {
 	gdump = true;
 	sz = sizeof(gdump_old);
 	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
-	    (void *)&gdump, sizeof(gdump)), 0,
-	    "Unexpected mallctl failure while enabling prof.gdump");
+	                (void *)&gdump, sizeof(gdump)),
+	    0, "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
 	s = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
@@ -72,6 +72,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_gdump);
+	return test_no_reentrancy(test_gdump);
 }
diff --git a/test/unit/prof_gdump.sh b/test/unit/prof_gdump.sh
index 3f600d20..a0b91dff 100644
--- a/test/unit/prof_gdump.sh
+++ b/test/unit/prof_gdump.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true"
+  export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true,lg_prof_sample:0"
 fi
 
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index 6480d930..1d58469c 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -1,11 +1,24 @@
 #include "test/jemalloc_test.h"
 
+/*
+ * The MALLOC_CONF of this test has lg_prof_sample:0, meaning that every single
+ * allocation will be sampled (and trigger relevant hooks).
+ */
+
 const char *dump_filename = "/dev/null";
 
-prof_backtrace_hook_t default_hook;
+prof_backtrace_hook_t default_bt_hook;
 
 bool mock_bt_hook_called = false;
 bool mock_dump_hook_called = false;
+bool mock_prof_sample_hook_called = false;
+bool mock_prof_sample_free_hook_called = false;
+
+void  *sampled_ptr = NULL;
+size_t sampled_ptr_sz = 0;
+size_t sampled_ptr_usz = 0;
+void  *free_sampled_ptr = NULL;
+size_t free_sampled_ptr_sz = 0;
 
 void
 mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
@@ -18,7 +31,7 @@ mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
 
 void
 mock_bt_augmenting_hook(void **vec, unsigned *len, unsigned max_len) {
-	default_hook(vec, len, max_len);
+	default_bt_hook(vec, len, max_len);
 	expect_u_gt(*len, 0, "Default backtrace hook returned empty backtrace");
 	expect_u_lt(*len, max_len,
 	    "Default backtrace hook returned too large backtrace");
@@ -36,7 +49,6 @@ mock_bt_augmenting_hook(void **vec, unsigned *len, unsigned max_len) {
 		(*len)++;
 	}
 
-
 	mock_bt_hook_called = true;
 }
 
@@ -47,8 +59,27 @@ mock_dump_hook(const char *filename) {
 	    "Incorrect file name passed to the dump hook");
 }
 
-TEST_BEGIN(test_prof_backtrace_hook_replace) {
+void
+mock_prof_sample_hook(
+    const void *ptr, size_t sz, void **vec, unsigned len, size_t usz) {
+	mock_prof_sample_hook_called = true;
+	sampled_ptr = (void *)ptr;
+	sampled_ptr_sz = sz;
+	sampled_ptr_usz = usz;
+	for (unsigned i = 0; i < len; i++) {
+		expect_ptr_not_null(
+		    (void **)vec[i], "Backtrace should not contain NULL");
+	}
+}
 
+void
+mock_prof_sample_free_hook(const void *ptr, size_t sz) {
+	mock_prof_sample_free_hook_called = true;
+	free_sampled_ptr = (void *)ptr;
+	free_sampled_ptr_sz = sz;
+}
+
+TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	test_skip_if(!config_prof);
 
 	mock_bt_hook_called = false;
@@ -59,15 +90,16 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
 
 	prof_backtrace_hook_t null_hook = NULL;
-	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
-		EINVAL, "Incorrectly allowed NULL backtrace hook");
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace", NULL, 0,
+	                (void *)&null_hook, sizeof(null_hook)),
+	    EINVAL, "Incorrectly allowed NULL backtrace hook");
 
-	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	                (void *)&default_bt_hook, &default_bt_hook_sz,
+	                (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
 	assert_ptr_not_null(p1, "Failed to allocate");
@@ -75,11 +107,11 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	expect_true(mock_bt_hook_called, "Didn't call mock hook");
 
 	prof_backtrace_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t                current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
@@ -90,7 +122,6 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 TEST_END
 
 TEST_BEGIN(test_prof_backtrace_hook_augment) {
-
 	test_skip_if(!config_prof);
 
 	mock_bt_hook_called = false;
@@ -100,11 +131,12 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 
 	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
 
-	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_augmenting_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	                (void *)&default_bt_hook, &default_bt_hook_sz,
+	                (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
 	assert_ptr_not_null(p1, "Failed to allocate");
@@ -112,11 +144,11 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 	expect_true(mock_bt_hook_called, "Didn't call mock hook");
 
 	prof_backtrace_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t                current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
@@ -127,43 +159,184 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 TEST_END
 
 TEST_BEGIN(test_prof_dump_hook) {
-
 	test_skip_if(!config_prof);
+	expect_u_eq(opt_prof_bt_max, 200, "Unexpected backtrace stack depth");
 
 	mock_dump_hook_called = false;
 
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
-	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+	                sizeof(dump_filename)),
+	    0, "Failed to dump heap profile");
 
 	expect_false(mock_dump_hook_called, "Called dump hook before it's set");
 
-	size_t default_hook_sz = sizeof(prof_dump_hook_t);
+	size_t           default_bt_hook_sz = sizeof(prof_dump_hook_t);
 	prof_dump_hook_t hook = &mock_dump_hook;
-	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	expect_d_eq(
+	    mallctl("experimental.hooks.prof_dump", (void *)&default_bt_hook,
+	        &default_bt_hook_sz, (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
-	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+	                sizeof(dump_filename)),
+	    0, "Failed to dump heap profile");
 
 	expect_true(mock_dump_hook_called, "Didn't call mock hook");
 
 	prof_dump_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_dump_hook_t);
+	size_t           current_hook_sz = sizeof(prof_dump_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
 }
 TEST_END
 
+/* Need the do_write flag because NULL is a valid to_write value. */
+static void
+read_write_prof_sample_hook(
+    prof_sample_hook_t *to_read, bool do_write, prof_sample_hook_t to_write) {
+	size_t hook_sz = sizeof(prof_sample_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_sample", (void *)to_read,
+	                &hook_sz, do_write ? &to_write : NULL, hook_sz),
+	    0, "Unexpected prof_sample_hook mallctl failure");
+}
+
+static void
+write_prof_sample_hook(prof_sample_hook_t new_hook) {
+	read_write_prof_sample_hook(NULL, true, new_hook);
+}
+
+static prof_sample_hook_t
+read_prof_sample_hook(void) {
+	prof_sample_hook_t curr_hook;
+	read_write_prof_sample_hook(&curr_hook, false, NULL);
+
+	return curr_hook;
+}
+
+static void
+read_write_prof_sample_free_hook(prof_sample_free_hook_t *to_read,
+    bool do_write, prof_sample_free_hook_t to_write) {
+	size_t hook_sz = sizeof(prof_sample_free_hook_t);
+	expect_d_eq(
+	    mallctl("experimental.hooks.prof_sample_free", (void *)to_read,
+	        &hook_sz, do_write ? &to_write : NULL, hook_sz),
+	    0, "Unexpected prof_sample_free_hook mallctl failure");
+}
+
+static void
+write_prof_sample_free_hook(prof_sample_free_hook_t new_hook) {
+	read_write_prof_sample_free_hook(NULL, true, new_hook);
+}
+
+static prof_sample_free_hook_t
+read_prof_sample_free_hook(void) {
+	prof_sample_free_hook_t curr_hook;
+	read_write_prof_sample_free_hook(&curr_hook, false, NULL);
+
+	return curr_hook;
+}
+
+static void
+check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
+	expect_false(mock_prof_sample_hook_called,
+	    "Should not have called prof_sample hook");
+	expect_false(mock_prof_sample_free_hook_called,
+	    "Should not have called prof_sample_free hook");
+	expect_ptr_null(sampled_ptr, "Unexpected sampled ptr");
+	expect_zu_eq(sampled_ptr_sz, 0, "Unexpected sampled ptr size");
+	expect_zu_eq(sampled_ptr_usz, 0, "Unexpected sampled ptr usize");
+	expect_ptr_null(free_sampled_ptr, "Unexpected free sampled ptr");
+	expect_zu_eq(
+	    free_sampled_ptr_sz, 0, "Unexpected free sampled ptr size");
+
+	prof_sample_hook_t curr_hook = read_prof_sample_hook();
+	expect_ptr_eq(curr_hook, sample_hook_set ? mock_prof_sample_hook : NULL,
+	    "Unexpected non NULL default hook");
+
+	prof_sample_free_hook_t curr_free_hook = read_prof_sample_free_hook();
+	expect_ptr_eq(curr_free_hook,
+	    sample_free_hook_set ? mock_prof_sample_free_hook : NULL,
+	    "Unexpected non NULL default hook");
+
+	size_t alloc_sz = 10;
+	size_t alloc_usz = 16;
+	void  *p = mallocx(alloc_sz, 0);
+	expect_ptr_not_null(p, "Failed to allocate");
+	expect_true(mock_prof_sample_hook_called == sample_hook_set,
+	    "Incorrect prof_sample hook usage");
+	if (sample_hook_set) {
+		expect_ptr_eq(p, sampled_ptr, "Unexpected sampled ptr");
+		expect_zu_eq(
+		    alloc_sz, sampled_ptr_sz, "Unexpected sampled usize");
+		expect_zu_eq(
+		    alloc_usz, sampled_ptr_usz, "Unexpected sampled usize");
+	}
+
+	dallocx(p, 0);
+	expect_true(mock_prof_sample_free_hook_called == sample_free_hook_set,
+	    "Incorrect prof_sample_free hook usage");
+	if (sample_free_hook_set) {
+		size_t usz = sz_s2u(alloc_sz);
+		expect_ptr_eq(p, free_sampled_ptr, "Unexpected sampled ptr");
+		expect_zu_eq(
+		    usz, free_sampled_ptr_sz, "Unexpected sampled usize");
+	}
+
+	sampled_ptr = free_sampled_ptr = NULL;
+	sampled_ptr_sz = sampled_ptr_usz = free_sampled_ptr_sz = 0;
+	mock_prof_sample_hook_called = false;
+	mock_prof_sample_free_hook_called = false;
+}
+
+TEST_BEGIN(test_prof_sample_hooks) {
+	test_skip_if(!config_prof);
+
+	check_prof_sample_hooks(false, false);
+
+	write_prof_sample_hook(mock_prof_sample_hook);
+	check_prof_sample_hooks(true, false);
+
+	write_prof_sample_free_hook(mock_prof_sample_free_hook);
+	check_prof_sample_hooks(true, true);
+
+	write_prof_sample_hook(NULL);
+	check_prof_sample_hooks(false, true);
+
+	write_prof_sample_free_hook(NULL);
+	check_prof_sample_hooks(false, false);
+
+	/* Test read+write together. */
+	prof_sample_hook_t sample_hook;
+	read_write_prof_sample_hook(&sample_hook, true, mock_prof_sample_hook);
+	expect_ptr_null(sample_hook, "Unexpected non NULL default hook");
+	check_prof_sample_hooks(true, false);
+
+	prof_sample_free_hook_t sample_free_hook;
+	read_write_prof_sample_free_hook(
+	    &sample_free_hook, true, mock_prof_sample_free_hook);
+	expect_ptr_null(sample_free_hook, "Unexpected non NULL default hook");
+	check_prof_sample_hooks(true, true);
+
+	read_write_prof_sample_hook(&sample_hook, true, NULL);
+	expect_ptr_eq(
+	    sample_hook, mock_prof_sample_hook, "Unexpected prof_sample hook");
+	check_prof_sample_hooks(false, true);
+
+	read_write_prof_sample_free_hook(&sample_free_hook, true, NULL);
+	expect_ptr_eq(sample_free_hook, mock_prof_sample_free_hook,
+	    "Unexpected prof_sample_free hook");
+	check_prof_sample_hooks(false, false);
+}
+TEST_END
+
 int
 main(void) {
-	return test(
-	    test_prof_backtrace_hook_replace,
-	    test_prof_backtrace_hook_augment,
-	    test_prof_dump_hook);
+	return test(test_prof_backtrace_hook_replace,
+	    test_prof_backtrace_hook_augment, test_prof_dump_hook,
+	    test_prof_sample_hooks);
 }
diff --git a/test/unit/prof_hook.sh b/test/unit/prof_hook.sh
index c7ebd8f9..48cd51a5 100644
--- a/test/unit/prof_hook.sh
+++ b/test/unit/prof_hook.sh
@@ -1,6 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_bt_max:200"
 fi
-
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 455ac529..b16b4a1f 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -13,8 +13,9 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 	did_prof_dump_open = true;
 
 	const char filename_prefix[] = TEST_PREFIX ".";
-	expect_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
-	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
+	expect_d_eq(
+	    strncmp(filename_prefix, filename, sizeof(filename_prefix) - 1), 0,
+	    "Dump file name should start with \"" TEST_PREFIX ".\"");
 
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
@@ -23,7 +24,7 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 }
 
 TEST_BEGIN(test_idump) {
-	bool active;
+	bool  active;
 	void *p;
 
 	const char *test_prefix = TEST_PREFIX;
@@ -33,12 +34,12 @@ TEST_BEGIN(test_idump) {
 	active = true;
 
 	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&test_prefix,
-	    sizeof(test_prefix)), 0,
-	    "Unexpected mallctl failure while overwriting dump prefix");
+	                sizeof(test_prefix)),
+	    0, "Unexpected mallctl failure while overwriting dump prefix");
 
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -52,6 +53,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_idump);
+	return test(test_idump);
 }
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 5ff208e2..8cfc19ff 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -4,22 +4,25 @@
 #define N_PARAM 100
 #define N_THREADS 10
 
-static void expect_rep() {
+static void
+expect_rep(void) {
 	expect_b_eq(prof_log_rep_check(), false, "Rep check failed");
 }
 
-static void expect_log_empty() {
-	expect_zu_eq(prof_log_bt_count(), 0,
-	    "The log has backtraces; it isn't empty");
-	expect_zu_eq(prof_log_thr_count(), 0,
-	    "The log has threads; it isn't empty");
+static void
+expect_log_empty(void) {
+	expect_zu_eq(
+	    prof_log_bt_count(), 0, "The log has backtraces; it isn't empty");
+	expect_zu_eq(
+	    prof_log_thr_count(), 0, "The log has threads; it isn't empty");
 	expect_zu_eq(prof_log_alloc_count(), 0,
 	    "The log has allocations; it isn't empty");
 }
 
 void *buf[N_PARAM];
 
-static void f() {
+static void
+f(void) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		buf[i] = malloc(100);
@@ -46,8 +49,8 @@ TEST_BEGIN(test_prof_log_many_logs) {
 		f();
 		expect_zu_eq(prof_log_thr_count(), 1, "Wrong thread count");
 		expect_rep();
-		expect_b_eq(prof_log_is_logging(), true,
-		    "Logging should still be on");
+		expect_b_eq(
+		    prof_log_is_logging(), true, "Logging should still be on");
 		expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure when stopping logging");
 		expect_b_eq(prof_log_is_logging(), false,
@@ -58,7 +61,8 @@ TEST_END
 
 thd_t thr_buf[N_THREADS];
 
-static void *f_thread(void *unused) {
+static void *
+f_thread(void *unused) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		void *p = malloc(100);
@@ -70,7 +74,6 @@ static void *f_thread(void *unused) {
 }
 
 TEST_BEGIN(test_prof_log_many_threads) {
-
 	test_skip_if(!config_prof);
 
 	int i;
@@ -83,32 +86,34 @@ TEST_BEGIN(test_prof_log_many_threads) {
 	for (i = 0; i < N_THREADS; i++) {
 		thd_join(thr_buf[i], NULL);
 	}
-	expect_zu_eq(prof_log_thr_count(), N_THREADS,
-	    "Wrong number of thread entries");
+	expect_zu_eq(
+	    prof_log_thr_count(), N_THREADS, "Wrong number of thread entries");
 	expect_rep();
 	expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }
 TEST_END
 
-static void f3() {
+static void
+f3(void) {
 	void *p = malloc(100);
 	free(p);
 }
 
-static void f1() {
+static void
+f1(void) {
 	void *p = malloc(100);
 	f3();
 	free(p);
 }
 
-static void f2() {
+static void
+f2(void) {
 	void *p = malloc(100);
 	free(p);
 }
 
 TEST_BEGIN(test_prof_log_many_traces) {
-
 	test_skip_if(!config_prof);
 
 	expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
@@ -144,8 +149,6 @@ main(void) {
 	if (config_prof) {
 		prof_log_dummy_set(true);
 	}
-	return test_no_reentrancy(
-	    test_prof_log_many_logs,
-	    test_prof_log_many_traces,
-	    test_prof_log_many_threads);
+	return test_no_reentrancy(test_prof_log_many_logs,
+	    test_prof_log_many_traces, test_prof_log_many_threads);
 }
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index 75b3a515..0200f92f 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -3,7 +3,7 @@
 #include "jemalloc/internal/prof_sys.h"
 
 static const char *test_filename = "test_filename";
-static bool did_prof_dump_open;
+static bool        did_prof_dump_open;
 
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
@@ -35,8 +35,8 @@ TEST_BEGIN(test_mdump_normal) {
 	prof_dump_open_file = prof_dump_open_file_intercept;
 	did_prof_dump_open = false;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), 0,
-	    "Unexpected mallctl failure while dumping");
+	                sizeof(test_filename)),
+	    0, "Unexpected mallctl failure while dumping");
 	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	dallocx(p, 0);
@@ -89,7 +89,8 @@ static void
 expect_write_failure(int count) {
 	prof_dump_write_file_count = count;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), EFAULT, "Dump should err");
+	                sizeof(test_filename)),
+	    EFAULT, "Dump should err");
 	expect_d_eq(prof_dump_write_file_count, 0,
 	    "Dumping stopped after a wrong number of writes");
 }
@@ -98,7 +99,7 @@ TEST_BEGIN(test_mdump_output_error) {
 	test_skip_if(!config_prof);
 	test_skip_if(!config_debug);
 
-	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_open_file_t  *open_file_orig = prof_dump_open_file;
 	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
 
 	prof_dump_write_file = prof_dump_write_file_error;
@@ -129,7 +130,7 @@ TEST_BEGIN(test_mdump_output_error) {
 TEST_END
 
 static int
-prof_dump_open_maps_error() {
+prof_dump_open_maps_error(void) {
 	return -1;
 }
 
@@ -166,10 +167,11 @@ expect_maps_write_failure(int count) {
 TEST_BEGIN(test_mdump_maps_error) {
 	test_skip_if(!config_prof);
 	test_skip_if(!config_debug);
+	test_skip_if(prof_dump_open_maps == NULL);
 
-	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_open_file_t  *open_file_orig = prof_dump_open_file;
 	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
-	prof_dump_open_maps_t *open_maps_orig = prof_dump_open_maps;
+	prof_dump_open_maps_t  *open_maps_orig = prof_dump_open_maps;
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 	prof_dump_write_file = prof_dump_write_maps_file_error;
@@ -185,8 +187,8 @@ TEST_BEGIN(test_mdump_maps_error) {
 	started_piping_maps_file = false;
 	prof_dump_write_file_count = 0;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), 0,
-	    "mallctl should not fail in case of maps file opening failure");
+	                sizeof(test_filename)),
+	    0, "mallctl should not fail in case of maps file opening failure");
 	expect_false(started_piping_maps_file, "Shouldn't start piping maps");
 	expect_d_eq(prof_dump_write_file_count, 0,
 	    "Dumping stopped after a wrong number of writes");
@@ -210,7 +212,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_mdump_normal,
-	    test_mdump_output_error,
-	    test_mdump_maps_error);
+	    test_mdump_normal, test_mdump_output_error, test_mdump_maps_error);
 }
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 4fb37236..b8fd0ca8 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -5,9 +5,11 @@
 /* As specified in the shell script */
 #define OPT_ALLOC_MAX 3
 
+const char *test_thread_name = "test_thread";
+
 /* Invariant before and after every test (when config_prof is on) */
 static void
-confirm_prof_setup() {
+confirm_prof_setup(void) {
 	/* Options */
 	assert_true(opt_prof, "opt_prof not on");
 	assert_true(opt_prof_active, "opt_prof_active not on");
@@ -30,18 +32,20 @@ TEST_BEGIN(test_prof_recent_off) {
 	test_skip_if(config_prof);
 
 	const ssize_t past_ref = 0, future_ref = 0;
-	const size_t len_ref = sizeof(ssize_t);
+	const size_t  len_ref = sizeof(ssize_t);
 
 	ssize_t past = past_ref, future = future_ref;
-	size_t len = len_ref;
+	size_t  len = len_ref;
 
-#define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
-	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
-	    d), ENOENT, "Should return ENOENT when config_prof is off");\
-	assert_zd_eq(past, past_ref, "output was touched");		\
-	assert_zu_eq(len, len_ref, "output length was touched");	\
-	assert_zd_eq(future, future_ref, "input was touched");		\
-} while (0)
+#define ASSERT_SHOULD_FAIL(opt, a, b, c, d)                                    \
+	do {                                                                   \
+		assert_d_eq(                                                   \
+		    mallctl("experimental.prof_recent." opt, a, b, c, d),      \
+		    ENOENT, "Should return ENOENT when config_prof is off");   \
+		assert_zd_eq(past, past_ref, "output was touched");            \
+		assert_zu_eq(len, len_ref, "output length was touched");       \
+		assert_zd_eq(future, future_ref, "input was touched");         \
+	} while (0)
 
 	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
 	ASSERT_SHOULD_FAIL("alloc_max", &past, &len, NULL, 0);
@@ -56,40 +60,45 @@ TEST_BEGIN(test_prof_recent_on) {
 	test_skip_if(!config_prof);
 
 	ssize_t past, future;
-	size_t len = sizeof(ssize_t);
+	size_t  len = sizeof(ssize_t);
 
 	confirm_prof_setup();
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
+	assert_d_eq(
+	    mallctl("experimental.prof_recent.alloc_max", NULL, NULL, NULL, 0),
+	    0, "no-op mallctl should be allowed");
 	confirm_prof_setup();
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, NULL, 0), 0, "Read error");
+	assert_d_eq(
+	    mallctl("experimental.prof_recent.alloc_max", &past, &len, NULL, 0),
+	    0, "Read error");
 	expect_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, len), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, len),
+	    0, "Write error");
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), 0, "Read/write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    0, "Read/write error");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
 	future = -2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), EINVAL,
-	    "Invalid write should return EINVAL");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    EINVAL, "Invalid write should return EINVAL");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1,
 	    "Output should not be touched given invalid write");
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), 0, "Read/write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    0, "Read/write error");
 	expect_zd_eq(past, -1, "Wrong read result");
 	future = OPT_ALLOC_MAX + 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len * 2), EINVAL,
-	    "Invalid write should return EINVAL");
-	expect_zd_eq(past, -1,
-	    "Output should not be touched given invalid write");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len * 2),
+	    EINVAL, "Invalid write should return EINVAL");
+	expect_zd_eq(
+	    past, -1, "Output should not be touched given invalid write");
 
 	confirm_prof_setup();
 }
@@ -105,8 +114,8 @@ confirm_malloc(void *p) {
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock_test(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock_test(n),
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
@@ -114,17 +123,17 @@ confirm_malloc(void *p) {
 
 static void
 confirm_record_size(prof_recent_t *n, unsigned kth) {
-	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
-	    "Recorded allocation size is wrong");
+	expect_zu_eq(
+	    n->size, NTH_REQ_SIZE(kth), "Recorded allocation size is wrong");
 }
 
 static void
 confirm_record_living(prof_recent_t *n) {
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	edata_t *edata = prof_recent_alloc_edata_get_no_lock_test(n);
-	assert_ptr_not_null(edata,
-	    "Recorded edata should not be NULL for living pointer");
+	assert_ptr_not_null(
+	    edata, "Recorded edata should not be NULL for living pointer");
 	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock_test(edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
@@ -132,8 +141,8 @@ confirm_record_living(prof_recent_t *n) {
 
 static void
 confirm_record_released(prof_recent_t *n) {
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	expect_ptr_null(prof_recent_alloc_edata_get_no_lock_test(n),
 	    "Recorded edata should be NULL for released pointer");
 	expect_ptr_not_null(n->dalloc_tctx,
@@ -143,12 +152,12 @@ confirm_record_released(prof_recent_t *n) {
 TEST_BEGIN(test_prof_recent_alloc) {
 	test_skip_if(!config_prof);
 
-	bool b;
-	unsigned i, c;
-	size_t req_size;
-	void *p;
+	bool           b;
+	unsigned       i, c;
+	size_t         req_size;
+	void          *p;
 	prof_recent_t *n;
-	ssize_t future;
+	ssize_t        future;
 
 	confirm_prof_setup();
 
@@ -173,7 +182,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			continue;
 		}
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(n, i + c - OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
@@ -182,8 +191,8 @@ TEST_BEGIN(test_prof_recent_alloc) {
 				confirm_record_released(n);
 			}
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
@@ -202,13 +211,13 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		p = malloc(req_size);
 		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			confirm_record_size(n, c + OPT_ALLOC_MAX);
 			confirm_record_released(n);
 			++c;
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
@@ -229,91 +238,96 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		p = malloc(req_size);
 		confirm_malloc(p);
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(n,
 			    /* Is the allocation from the third batch? */
-			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX ?
-			    /* If yes, then it's just recorded. */
-			    i + c - OPT_ALLOC_MAX :
-			    /*
+			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX
+			        ?
+			        /* If yes, then it's just recorded. */
+			        i + c - OPT_ALLOC_MAX
+			        :
+			        /*
 			     * Otherwise, it should come from the first batch
 			     * instead of the second batch.
 			     */
-			    i + c - 2 * OPT_ALLOC_MAX);
+			        i + c - 2 * OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
 				confirm_record_living(n);
 			} else {
 				confirm_record_released(n);
 			}
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
 	/* Increasing the limit shouldn't alter the list of records. */
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 		++c;
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX,
-	    "Incorrect total number of allocations");
+	assert_u_eq(c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 
 	/*
 	 * Decreasing the limit shouldn't alter the list of records as long as
 	 * the new limit is still no less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 		++c;
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX,
-	    "Incorrect total number of allocations");
+	assert_u_eq(c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 
 	/*
 	 * Decreasing the limit should shorten the list of records if the new
 	 * limit is less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX - 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
-	    "Incorrect total number of allocations");
+	assert_u_eq(
+	    c, OPT_ALLOC_MAX - 1, "Incorrect total number of allocations");
 
 	/* Setting to unlimited shouldn't alter the list of records. */
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
-	    "Incorrect total number of allocations");
+	assert_u_eq(
+	    c, OPT_ALLOC_MAX - 1, "Incorrect total number of allocations");
 
 	/* Downshift to only one record. */
 	future = 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	assert_false(ql_empty(&prof_recent_alloc_list), "Recent list is empty");
 	n = ql_first(&prof_recent_alloc_list);
 	confirm_record_size(n, 4 * OPT_ALLOC_MAX - 1);
@@ -323,17 +337,19 @@ TEST_BEGIN(test_prof_recent_alloc) {
 
 	/* Completely turn off. */
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	assert_true(ql_empty(&prof_recent_alloc_list),
-	    "Recent list should be empty");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
+	assert_true(
+	    ql_empty(&prof_recent_alloc_list), "Recent list should be empty");
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	assert_true(ql_empty(&prof_recent_alloc_list),
-	    "Recent list should be empty");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
+	assert_true(
+	    ql_empty(&prof_recent_alloc_list), "Recent list should be empty");
 
 	confirm_prof_setup();
 }
@@ -342,7 +358,7 @@ TEST_END
 #undef NTH_REQ_SIZE
 
 #define DUMP_OUT_SIZE 4096
-static char dump_out[DUMP_OUT_SIZE];
+static char   dump_out[DUMP_OUT_SIZE];
 static size_t dump_out_len = 0;
 
 static void
@@ -354,17 +370,18 @@ test_dump_write_cb(void *not_used, const char *str) {
 }
 
 static void
-call_dump() {
+call_dump(void) {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
-	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump", NULL, NULL,
+	                in, sizeof(in)),
+	    0, "Dump mallctl raised error");
 }
 
 typedef struct {
 	size_t size;
 	size_t usize;
-	bool released;
+	bool   released;
 } confirm_record_t;
 
 #define DUMP_ERROR "Dump output is wrong"
@@ -373,7 +390,7 @@ static void
 confirm_record(const char *template, const confirm_record_t *records,
     const size_t n_records) {
 	static const char *types[2] = {"alloc", "dalloc"};
-	static char buf[64];
+	static char        buf[64];
 
 	/*
 	 * The template string would be in the form of:
@@ -382,32 +399,35 @@ confirm_record(const char *template, const confirm_record_t *records,
 	 * "{...,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
-	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
-	    DUMP_ERROR);
+	assert_d_eq(
+	    memcmp(dump_out, template, strlen(template) - 2), 0, DUMP_ERROR);
 	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
-	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
+	                template + strlen(template) - 2, 2),
+	    0, DUMP_ERROR);
 
-	const char *start = dump_out + strlen(template) - 2;
-	const char *end = dump_out + strlen(dump_out) - 2;
+	const char             *start = dump_out + strlen(template) - 2;
+	const char             *end = dump_out + strlen(dump_out) - 2;
 	const confirm_record_t *record;
 	for (record = records; record < records + n_records; ++record) {
+#define ASSERT_CHAR(c)                                                         \
+	do {                                                                   \
+		assert_true(start < end, DUMP_ERROR);                          \
+		assert_c_eq(*start++, c, DUMP_ERROR);                          \
+	} while (0)
 
-#define ASSERT_CHAR(c) do {						\
-	assert_true(start < end, DUMP_ERROR);				\
-	assert_c_eq(*start++, c, DUMP_ERROR);				\
-} while (0)
+#define ASSERT_STR(s)                                                          \
+	do {                                                                   \
+		const size_t len = strlen(s);                                  \
+		assert_true(start + len <= end, DUMP_ERROR);                   \
+		assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);             \
+		start += len;                                                  \
+	} while (0)
 
-#define ASSERT_STR(s) do {						\
-	const size_t len = strlen(s);					\
-	assert_true(start + len <= end, DUMP_ERROR);			\
-	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
-	start += len;							\
-} while (0)
-
-#define ASSERT_FORMATTED_STR(s, ...) do {				\
-	malloc_snprintf(buf, sizeof(buf), s, __VA_ARGS__);		\
-	ASSERT_STR(buf);						\
-} while (0)
+#define ASSERT_FORMATTED_STR(s, ...)                                           \
+	do {                                                                   \
+		malloc_snprintf(buf, sizeof(buf), s, __VA_ARGS__);             \
+		ASSERT_STR(buf);                                               \
+	} while (0)
 
 		if (record != records) {
 			ASSERT_CHAR(',');
@@ -439,16 +459,11 @@ confirm_record(const char *template, const confirm_record_t *records,
 			}
 			ASSERT_CHAR(',');
 
-			if (opt_prof_sys_thread_name) {
-				ASSERT_FORMATTED_STR("\"%s_thread_name\"",
-				    *type);
-				ASSERT_CHAR(':');
-				ASSERT_CHAR('"');
-				while (*start != '"') {
-					++start;
-				}
-				ASSERT_CHAR('"');
-				ASSERT_CHAR(',');
+			if (thd_has_setname() && opt_prof_sys_thread_name) {
+				ASSERT_FORMATTED_STR(
+				    "\"%s_thread_name\"", *type);
+				ASSERT_FORMATTED_STR(
+				    ":\"%s\",", test_thread_name);
 			}
 
 			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
@@ -461,9 +476,9 @@ confirm_record(const char *template, const confirm_record_t *records,
 			ASSERT_FORMATTED_STR("\"%s_trace\"", *type);
 			ASSERT_CHAR(':');
 			ASSERT_CHAR('[');
-			while (isdigit(*start) || *start == 'x' ||
-			    (*start >= 'a' && *start <= 'f') ||
-			    *start == '\"' || *start == ',') {
+			while (isdigit(*start) || *start == 'x'
+			    || (*start >= 'a' && *start <= 'f')
+			    || *start == '\"' || *start == ',') {
 				++start;
 			}
 			ASSERT_CHAR(']');
@@ -486,7 +501,6 @@ confirm_record(const char *template, const confirm_record_t *records,
 #undef ASSERT_FORMATTED_STR
 #undef ASSERT_STR
 #undef ASSERT_CHAR
-
 	}
 	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
 	assert_ptr_eq(start, end, DUMP_ERROR);
@@ -495,27 +509,33 @@ confirm_record(const char *template, const confirm_record_t *records,
 TEST_BEGIN(test_prof_recent_alloc_dump) {
 	test_skip_if(!config_prof);
 
+	thd_setname(test_thread_name);
 	confirm_prof_setup();
 
-	ssize_t future;
-	void *p, *q;
+	ssize_t          future;
+	void            *p, *q;
 	confirm_record_t records[2];
 
-	assert_zu_eq(lg_prof_sample, (size_t)0,
-	    "lg_prof_sample not set correctly");
+	assert_zu_eq(
+	    lg_prof_sample, (size_t)0, "lg_prof_sample not set correctly");
 
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	call_dump();
-	expect_str_eq(dump_out, "{\"sample_interval\":1,"
-	    "\"recent_alloc_max\":0,\"recent_alloc\":[]}", DUMP_ERROR);
+	expect_str_eq(dump_out,
+	    "{\"sample_interval\":1,"
+	    "\"recent_alloc_max\":0,\"recent_alloc\":[]}",
+	    DUMP_ERROR);
 
 	future = 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	call_dump();
-	const char *template = "{\"sample_interval\":1,"
+	const char *template =
+	    "{\"sample_interval\":1,"
 	    "\"recent_alloc_max\":2,\"recent_alloc\":[]}";
 	expect_str_eq(dump_out, template, DUMP_ERROR);
 
@@ -544,8 +564,9 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record(template, records, 2);
 
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	confirm_prof_setup();
 }
 TEST_END
@@ -560,14 +581,14 @@ TEST_END
 #define STRESS_ALLOC_MAX 4096
 
 typedef struct {
-	thd_t thd;
+	thd_t  thd;
 	size_t id;
-	void *ptrs[N_PTRS];
+	void  *ptrs[N_PTRS];
 	size_t count;
 } thd_data_t;
 
 static thd_data_t thd_data[N_THREADS];
-static ssize_t test_max;
+static ssize_t    test_max;
 
 static void
 test_write_cb(void *cbopaque, const char *str) {
@@ -577,11 +598,11 @@ test_write_cb(void *cbopaque, const char *str) {
 static void *
 f_thread(void *arg) {
 	const size_t thd_id = *(size_t *)arg;
-	thd_data_t *data_p = thd_data + thd_id;
+	thd_data_t  *data_p = thd_data + thd_id;
 	assert(data_p->id == thd_id);
 	data_p->count = 0;
 	uint64_t rand = (uint64_t)thd_id;
-	tsd_t *tsd = tsd_fetch();
+	tsd_t   *tsd = tsd_fetch();
 	assert(test_max > 1);
 	ssize_t last_max = -1;
 	for (int i = 0; i < N_ITERS; i++) {
@@ -605,15 +626,15 @@ f_thread(void *arg) {
 		} else if (rand % 5 == 1) {
 			last_max = prof_recent_alloc_max_ctl_read();
 		} else if (rand % 5 == 2) {
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max * 2);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max * 2);
 		} else if (rand % 5 == 3) {
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max);
 		} else {
 			assert(rand % 5 == 4);
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max / 2);
 		}
 		assert_zd_ge(last_max, -1, "Illegal last-N max");
 	}
@@ -642,8 +663,9 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = STRESS_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &test_max, sizeof(ssize_t)),
+	    0, "Write error");
 	for (size_t i = 0; i < N_THREADS; i++) {
 		thd_data_t *data_p = thd_data + i;
 		data_p->id = i;
@@ -655,8 +677,9 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &test_max, sizeof(ssize_t)),
+	    0, "Write error");
 	confirm_prof_setup();
 }
 TEST_END
@@ -668,11 +691,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_confirm_setup,
-	    test_prof_recent_off,
-	    test_prof_recent_on,
-	    test_prof_recent_alloc,
-	    test_prof_recent_alloc_dump,
-	    test_prof_recent_stress);
+	return test(test_confirm_setup, test_prof_recent_off,
+	    test_prof_recent_on, test_prof_recent_alloc,
+	    test_prof_recent_alloc_dump, test_prof_recent_stress);
 }
diff --git a/test/unit/prof_recent.sh b/test/unit/prof_recent.sh
index 58a54a47..10415bf3 100644
--- a/test/unit/prof_recent.sh
+++ b/test/unit/prof_recent.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_recent_alloc_max:3"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_recent_alloc_max:3,prof_sys_thread_name:true"
 fi
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 9b33b205..0e64279e 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -15,8 +15,9 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 static void
 set_prof_active(bool active) {
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure");
 }
 
 static size_t
@@ -32,25 +33,26 @@ get_lg_prof_sample(void) {
 static void
 do_prof_reset(size_t lg_prof_sample_input) {
 	expect_d_eq(mallctl("prof.reset", NULL, NULL,
-	    (void *)&lg_prof_sample_input, sizeof(size_t)), 0,
-	    "Unexpected mallctl failure while resetting profile data");
+	                (void *)&lg_prof_sample_input, sizeof(size_t)),
+	    0, "Unexpected mallctl failure while resetting profile data");
 	expect_zu_eq(lg_prof_sample_input, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
 }
 
 TEST_BEGIN(test_prof_reset_basic) {
-	size_t lg_prof_sample_orig, lg_prof_sample_cur, lg_prof_sample_next;
-	size_t sz;
+	size_t   lg_prof_sample_orig, lg_prof_sample_cur, lg_prof_sample_next;
+	size_t   sz;
 	unsigned i;
 
 	test_skip_if(!config_prof);
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
-	    &sz, NULL, 0), 0,
+	                &sz, NULL, 0),
+	    0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
-	expect_zu_eq(lg_prof_sample_orig, 0,
-	    "Unexpected profiling sample rate");
+	expect_zu_eq(
+	    lg_prof_sample_orig, 0, "Unexpected profiling sample rate");
 	lg_prof_sample_cur = get_lg_prof_sample();
 	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
@@ -110,23 +112,24 @@ TEST_BEGIN(test_prof_reset_cleanup) {
 }
 TEST_END
 
-#define NTHREADS		4
-#define NALLOCS_PER_THREAD	(1U << 13)
-#define OBJ_RING_BUF_COUNT	1531
-#define RESET_INTERVAL		(1U << 10)
-#define DUMP_INTERVAL		3677
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD (1U << 13)
+#define OBJ_RING_BUF_COUNT 1531
+#define RESET_INTERVAL (1U << 10)
+#define DUMP_INTERVAL 3677
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
 	unsigned i;
-	void *objs[OBJ_RING_BUF_COUNT];
+	void    *objs[OBJ_RING_BUF_COUNT];
 
 	memset(objs, 0, sizeof(objs));
 
 	for (i = 0; i < NALLOCS_PER_THREAD; i++) {
 		if (i % RESET_INTERVAL == 0) {
 			expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
-			    0, "Unexpected error while resetting heap profile "
+			    0,
+			    "Unexpected error while resetting heap profile "
 			    "data");
 		}
 
@@ -141,9 +144,9 @@ thd_start(void *varg) {
 				dallocx(*pp, 0);
 				*pp = NULL;
 			}
-			*pp = btalloc(1, thd_ind*NALLOCS_PER_THREAD + i);
-			expect_ptr_not_null(*pp,
-			    "Unexpected btalloc() failure");
+			*pp = btalloc(1, thd_ind * NALLOCS_PER_THREAD + i);
+			expect_ptr_not_null(
+			    *pp, "Unexpected btalloc() failure");
 		}
 	}
 
@@ -160,17 +163,16 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_prof_reset) {
-	size_t lg_prof_sample_orig;
-	thd_t thds[NTHREADS];
+	size_t   lg_prof_sample_orig;
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
-	size_t bt_count, tdata_count;
+	size_t   bt_count, tdata_count;
 
 	test_skip_if(!config_prof);
 
 	bt_count = prof_bt_count();
-	expect_zu_eq(bt_count, 0,
-	    "Unexpected pre-existing tdata structures");
+	expect_zu_eq(bt_count, 0, "Unexpected pre-existing tdata structures");
 	tdata_count = prof_tdata_count();
 
 	lg_prof_sample_orig = get_lg_prof_sample();
@@ -186,8 +188,8 @@ TEST_BEGIN(test_prof_reset) {
 		thd_join(thds[i], NULL);
 	}
 
-	expect_zu_eq(prof_bt_count(), bt_count,
-	    "Unexpected bactrace count change");
+	expect_zu_eq(
+	    prof_bt_count(), bt_count, "Unexpected bactrace count change");
 	expect_zu_eq(prof_tdata_count(), tdata_count,
 	    "Unexpected remaining tdata structures");
 
@@ -205,9 +207,9 @@ TEST_END
 /* Test sampling at the same allocation site across resets. */
 #define NITER 10
 TEST_BEGIN(test_xallocx) {
-	size_t lg_prof_sample_orig;
+	size_t   lg_prof_sample_orig;
 	unsigned i;
-	void *ptrs[NITER];
+	void    *ptrs[NITER];
 
 	test_skip_if(!config_prof);
 
@@ -218,7 +220,7 @@ TEST_BEGIN(test_xallocx) {
 	do_prof_reset(0);
 
 	for (i = 0; i < NITER; i++) {
-		void *p;
+		void  *p;
 		size_t sz, nsz;
 
 		/* Reset profiling. */
@@ -233,13 +235,13 @@ TEST_BEGIN(test_xallocx) {
 
 		/* Perform successful xallocx(). */
 		sz = sallocx(p, 0);
-		expect_zu_eq(xallocx(p, sz, 0, 0), sz,
-		    "Unexpected xallocx() failure");
+		expect_zu_eq(
+		    xallocx(p, sz, 0, 0), sz, "Unexpected xallocx() failure");
 
 		/* Perform unsuccessful xallocx(). */
-		nsz = nallocx(sz+1, 0);
-		expect_zu_eq(xallocx(p, nsz, 0, 0), sz,
-		    "Unexpected xallocx() success");
+		nsz = nallocx(sz + 1, 0);
+		expect_zu_eq(
+		    xallocx(p, nsz, 0, 0), sz, "Unexpected xallocx() success");
 	}
 
 	for (i = 0; i < NITER; i++) {
@@ -258,9 +260,6 @@ main(void) {
 	/* Intercept dumping prior to running any tests. */
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
-	return test_no_reentrancy(
-	    test_prof_reset_basic,
-	    test_prof_reset_cleanup,
-	    test_prof_reset,
-	    test_xallocx);
+	return test_no_reentrancy(test_prof_reset_basic,
+	    test_prof_reset_cleanup, test_prof_reset, test_xallocx);
 }
diff --git a/test/unit/prof_small.c b/test/unit/prof_small.c
new file mode 100644
index 00000000..993a83a7
--- /dev/null
+++ b/test/unit/prof_small.c
@@ -0,0 +1,106 @@
+#include "test/jemalloc_test.h"
+
+static void
+assert_small_allocation_sampled(void *ptr, size_t size) {
+	assert_ptr_not_null(ptr, "Unexpected malloc failure");
+	assert_zu_le(size, SC_SMALL_MAXCLASS, "Unexpected large size class");
+	edata_t *edata = emap_edata_lookup(TSDN_NULL, &arena_emap_global, ptr);
+	assert_ptr_not_null(edata, "Unable to find edata for allocation");
+	expect_false(edata_slab_get(edata),
+	    "Sampled small allocations should not be placed on slabs");
+	expect_ptr_eq(edata_base_get(edata), ptr,
+	    "Sampled allocations should be page-aligned");
+	expect_zu_eq(edata_usize_get(edata), size,
+	    "Edata usize did not match requested size");
+	expect_zu_eq(edata_size_get(edata), PAGE_CEILING(size) + sz_large_pad,
+	    "Edata actual size was not a multiple of PAGE");
+	prof_tctx_t *prof_tctx = edata_prof_tctx_get(edata);
+	expect_ptr_not_null(prof_tctx, "Edata had null prof_tctx");
+	expect_ptr_not_null(prof_tctx->tdata,
+	    "Edata had null prof_tdata despite being sampled");
+}
+
+TEST_BEGIN(test_profile_small_allocations) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void  *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		free(ptr);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_allocations_sdallocx) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void  *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		/*
+		 * While free calls into ifree, sdallocx calls into isfree,
+		 * This test covers the isfree path to make sure promoted small
+		 * allocs are handled properly.
+		 */
+		sdallocx(ptr, size, 0);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_growing) {
+	test_skip_if(!config_prof);
+
+	void *ptr = NULL;
+	for (szind_t index = 0; index <= SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		ptr = realloc(ptr, size);
+		/*
+		 * When index reaches SC_NBINS, it is no longer a small alloc,
+		 * we still want to test the realloc from a small alloc to a
+		 * large one, but we should not assert_small_allocation_sampled
+		 * on it.
+		 */
+		if (index == SC_NBINS) {
+			break;
+		}
+		assert_small_allocation_sampled(ptr, size);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_shrinking) {
+	test_skip_if(!config_prof);
+
+	void *ptr = NULL;
+	for (szind_t index = SC_NBINS; index-- > 0;) {
+		size_t size = sz_index2size(index);
+		ptr = realloc(ptr, size);
+		assert_small_allocation_sampled(ptr, size);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_same_size_class) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void  *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		ptr = realloc(ptr, size - 1);
+		assert_small_allocation_sampled(ptr, size);
+		free(ptr);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_profile_small_allocations,
+	    test_profile_small_allocations_sdallocx,
+	    test_profile_small_reallocations_growing,
+	    test_profile_small_reallocations_shrinking,
+	    test_profile_small_reallocations_same_size_class);
+}
diff --git a/test/unit/prof_small.sh b/test/unit/prof_small.sh
new file mode 100644
index 00000000..d14cb8c5
--- /dev/null
+++ b/test/unit/prof_small.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
+
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index c88c4ae0..95ca051c 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -3,8 +3,8 @@
 #define N_PTRS 3
 
 static void
-test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
-    int flags_array[N_PTRS]) {
+test_combinations(
+    szind_t ind, size_t sizes_array[N_PTRS], int flags_array[N_PTRS]) {
 #define MALLCTL_STR_LEN 64
 	assert(opt_prof && opt_prof_stats);
 
@@ -25,11 +25,13 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 	size_t stats_len = 2 * sizeof(uint64_t);
 
 	uint64_t live_stats_orig[2];
-	assert_d_eq(mallctl(mallctl_live_str, &live_stats_orig, &stats_len,
-	    NULL, 0), 0, "");
+	assert_d_eq(
+	    mallctl(mallctl_live_str, &live_stats_orig, &stats_len, NULL, 0), 0,
+	    "");
 	uint64_t accum_stats_orig[2];
-	assert_d_eq(mallctl(mallctl_accum_str, &accum_stats_orig, &stats_len,
-	    NULL, 0), 0, "");
+	assert_d_eq(
+	    mallctl(mallctl_accum_str, &accum_stats_orig, &stats_len, NULL, 0),
+	    0, "");
 
 	void *ptrs[N_PTRS];
 
@@ -40,8 +42,8 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 
 	for (size_t i = 0; i < N_PTRS; ++i) {
 		size_t sz = sizes_array[i];
-		int flags = flags_array[i];
-		void *p = mallocx(sz, flags);
+		int    flags = flags_array[i];
+		void  *p = mallocx(sz, flags);
 		assert_ptr_not_null(p, "malloc() failed");
 		assert(TEST_MALLOC_SIZE(p) == sz_index2size(ind));
 		ptrs[i] = p;
@@ -50,41 +52,45 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 		accum_req_sum += sz;
 		accum_count++;
 		uint64_t live_stats[2];
-		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(live_stats[0] - live_stats_orig[0],
-		    live_req_sum, "");
-		expect_u64_eq(live_stats[1] - live_stats_orig[1],
-		    live_count, "");
+		assert_d_eq(
+		    mallctl(mallctl_live_str, &live_stats, &stats_len, NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    live_stats[0] - live_stats_orig[0], live_req_sum, "");
+		expect_u64_eq(
+		    live_stats[1] - live_stats_orig[1], live_count, "");
 		uint64_t accum_stats[2];
 		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
-		    accum_req_sum, "");
-		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
-		    accum_count, "");
+		                NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    accum_stats[0] - accum_stats_orig[0], accum_req_sum, "");
+		expect_u64_eq(
+		    accum_stats[1] - accum_stats_orig[1], accum_count, "");
 	}
 
 	for (size_t i = 0; i < N_PTRS; ++i) {
 		size_t sz = sizes_array[i];
-		int flags = flags_array[i];
+		int    flags = flags_array[i];
 		sdallocx(ptrs[i], sz, flags);
 		live_req_sum -= sz;
 		live_count--;
 		uint64_t live_stats[2];
-		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(live_stats[0] - live_stats_orig[0],
-		    live_req_sum, "");
-		expect_u64_eq(live_stats[1] - live_stats_orig[1],
-		    live_count, "");
+		assert_d_eq(
+		    mallctl(mallctl_live_str, &live_stats, &stats_len, NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    live_stats[0] - live_stats_orig[0], live_req_sum, "");
+		expect_u64_eq(
+		    live_stats[1] - live_stats_orig[1], live_count, "");
 		uint64_t accum_stats[2];
 		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
-		    accum_req_sum, "");
-		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
-		    accum_count, "");
+		                NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    accum_stats[0] - accum_stats_orig[0], accum_req_sum, "");
+		expect_u64_eq(
+		    accum_stats[1] - accum_stats_orig[1], accum_count, "");
 	}
 #undef MALLCTL_STR_LEN
 }
@@ -92,9 +98,9 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 static void
 test_szind_wrapper(szind_t ind) {
 	size_t sizes_array[N_PTRS];
-	int flags_array[N_PTRS];
+	int    flags_array[N_PTRS];
 	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
+	     ++i, ++sz) {
 		sizes_array[i] = sz;
 		flags_array[i] = 0;
 	}
@@ -115,10 +121,10 @@ TEST_END
 static void
 test_szind_aligned_wrapper(szind_t ind, unsigned lg_align) {
 	size_t sizes_array[N_PTRS];
-	int flags_array[N_PTRS];
-	int flags = MALLOCX_LG_ALIGN(lg_align);
+	int    flags_array[N_PTRS];
+	int    flags = MALLOCX_LG_ALIGN(lg_align);
 	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
+	     ++i, ++sz) {
 		sizes_array[i] = sz;
 		flags_array[i] = flags;
 	}
@@ -136,7 +142,7 @@ TEST_BEGIN(test_prof_stats_aligned) {
 	}
 	for (szind_t ind = SC_NBINS - 5; ind < SC_NBINS + 5; ++ind) {
 		for (unsigned lg_align = SC_LG_LARGE_MINCLASS - 5;
-		    lg_align < SC_LG_LARGE_MINCLASS + 5; ++lg_align) {
+		     lg_align < SC_LG_LARGE_MINCLASS + 5; ++lg_align) {
 			test_szind_aligned_wrapper(ind, lg_align);
 		}
 	}
@@ -145,7 +151,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_stats,
-	    test_prof_stats_aligned);
+	return test(test_prof_stats, test_prof_stats_aligned);
 }
diff --git a/test/unit/prof_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
index affc788a..242e2fc3 100644
--- a/test/unit/prof_sys_thread_name.c
+++ b/test/unit/prof_sys_thread_name.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/prof_sys.h"
 
 static const char *test_thread_name = "test_name";
+static const char *dump_filename = "/dev/null";
 
 static int
 test_prof_sys_thread_name_read_error(char *buf, size_t limit) {
@@ -25,8 +26,9 @@ test_prof_sys_thread_name_read_clear(char *buf, size_t limit) {
 
 TEST_BEGIN(test_prof_sys_thread_name) {
 	test_skip_if(!config_prof);
+	test_skip_if(!opt_prof_sys_thread_name);
 
-	bool oldval;
+	bool   oldval;
 	size_t sz = sizeof(oldval);
 	assert_d_eq(mallctl("opt.prof_sys_thread_name", &oldval, &sz, NULL, 0),
 	    0, "mallctl failed");
@@ -41,9 +43,11 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	thread_name = test_thread_name;
 	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, sz),
 	    ENOENT, "mallctl write for thread name should fail");
-	assert_ptr_eq(thread_name, test_thread_name,
-	    "Thread name should not be touched");
+	assert_ptr_eq(
+	    thread_name, test_thread_name, "Thread name should not be touched");
 
+	prof_sys_thread_name_read_t *orig_prof_sys_thread_name_read =
+	    prof_sys_thread_name_read;
 	prof_sys_thread_name_read = test_prof_sys_thread_name_read_error;
 	void *p = malloc(1);
 	free(p);
@@ -65,13 +69,56 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	free(p);
 	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
 	    "mallctl read for thread name should not fail");
-	expect_str_eq(thread_name, "", "Thread name should be updated if the "
+	expect_str_eq(thread_name, "",
+	    "Thread name should be updated if the "
 	    "system call returns a different name");
+
+	prof_sys_thread_name_read = orig_prof_sys_thread_name_read;
 }
 TEST_END
 
+#define ITER (16 * 1024)
+static void *
+thd_start(void *unused) {
+	/* Triggering samples which loads thread names. */
+	for (unsigned i = 0; i < ITER; i++) {
+		void *p = mallocx(4096, 0);
+		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		dallocx(p, 0);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_prof_sys_thread_name_mt) {
+	test_skip_if(!config_prof);
+	test_skip_if(!opt_prof_sys_thread_name);
+
+#define NTHREADS 4
+	thd_t    thds[NTHREADS];
+	unsigned thd_args[NTHREADS];
+	unsigned i;
+
+	for (i = 0; i < NTHREADS; i++) {
+		thd_args[i] = i;
+		thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
+	}
+	/* Prof dump which reads the thread names. */
+	for (i = 0; i < ITER; i++) {
+		expect_d_eq(mallctl("prof.dump", NULL, NULL,
+		                (void *)&dump_filename, sizeof(dump_filename)),
+		    0, "Unexpected mallctl failure while dumping");
+	}
+
+	for (i = 0; i < NTHREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+}
+#undef NTHREADS
+#undef ITER
+TEST_END
+
 int
 main(void) {
-	return test(
-	    test_prof_sys_thread_name);
+	return test(test_prof_sys_thread_name, test_prof_sys_thread_name_mt);
 }
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index e0efdc36..7fde7230 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -3,11 +3,11 @@
 #include "jemalloc/internal/prof_data.h"
 
 TEST_BEGIN(test_prof_realloc) {
-	tsd_t *tsd;
-	int flags;
-	void *p, *q;
+	tsd_t      *tsd;
+	int         flags;
+	void       *p, *q;
 	prof_info_t prof_info_p, prof_info_q;
-	prof_cnt_t cnt_0, cnt_1, cnt_2, cnt_3;
+	prof_cnt_t  cnt_0, cnt_1, cnt_2, cnt_3;
 
 	test_skip_if(!config_prof);
 
@@ -18,8 +18,8 @@ TEST_BEGIN(test_prof_realloc) {
 	p = mallocx(1024, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	expect_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
-	    "Expected valid tctx");
+	expect_ptr_ne(
+	    prof_info_p.alloc_tctx, PROF_TCTX_SENTINEL, "Expected valid tctx");
 	prof_cnt_all(&cnt_1);
 	expect_u64_eq(cnt_0.curobjs + 1, cnt_1.curobjs,
 	    "Allocation should have increased sample size");
@@ -28,8 +28,8 @@ TEST_BEGIN(test_prof_realloc) {
 	expect_ptr_ne(p, q, "Expected move");
 	expect_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	expect_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
-	    "Expected valid tctx");
+	expect_ptr_ne(
+	    prof_info_q.alloc_tctx, PROF_TCTX_SENTINEL, "Expected valid tctx");
 	prof_cnt_all(&cnt_2);
 	expect_u64_eq(cnt_1.curobjs, cnt_2.curobjs,
 	    "Reallocation should not have changed sample size");
@@ -43,6 +43,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prof_realloc);
+	return test_no_reentrancy(test_prof_realloc);
 }
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 3c4614fc..8b12c435 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -1,80 +1,87 @@
 #include "test/jemalloc_test.h"
 
 static void
-mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
-    int line) {
+mallctl_thread_name_get_impl(
+    const char *thread_name_expected, const char *func, int line) {
 	const char *thread_name_old;
-	size_t sz;
+	size_t      sz;
 
 	sz = sizeof(thread_name_old);
-	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
-	    NULL, 0), 0,
-	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+	expect_d_eq(
+	    mallctl("thread.prof.name", (void *)&thread_name_old, &sz, NULL, 0),
+	    0, "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
 	expect_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
 }
-#define mallctl_thread_name_get(a)					\
-	mallctl_thread_name_get_impl(a, __func__, __LINE__)
 
 static void
-mallctl_thread_name_set_impl(const char *thread_name, const char *func,
-    int line) {
+mallctl_thread_name_set_impl(
+    const char *thread_name, const char *func, int line) {
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), 0,
-	    "%s():%d: Unexpected mallctl failure writing thread.prof.name",
+	                (void *)&thread_name, sizeof(thread_name)),
+	    0, "%s():%d: Unexpected mallctl failure writing thread.prof.name",
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
 }
-#define mallctl_thread_name_set(a)					\
+
+#define mallctl_thread_name_get(a)                                             \
+	mallctl_thread_name_get_impl(a, __func__, __LINE__)
+
+#define mallctl_thread_name_set(a)                                             \
 	mallctl_thread_name_set_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_thread_name_validation) {
-	const char *thread_name;
-
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof_sys_thread_name);
 
 	mallctl_thread_name_get("");
-	mallctl_thread_name_set("hi there");
+
+	const char *test_name1 = "test case1";
+	mallctl_thread_name_set(test_name1);
+
+	/* Test name longer than the max len. */
+	char long_name[] =
+	    "test case longer than expected; test case longer than expected";
+	expect_zu_gt(strlen(long_name), PROF_THREAD_NAME_MAX_LEN,
+	    "Long test name not long enough");
+	const char *test_name_long = long_name;
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	                (void *)&test_name_long, sizeof(test_name_long)),
+	    0, "Unexpected mallctl failure from thread.prof.name");
+	/* Long name cut to match. */
+	long_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
+	mallctl_thread_name_get(test_name_long);
 
 	/* NULL input shouldn't be allowed. */
-	thread_name = NULL;
-	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
-	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
-	    thread_name);
+	const char *test_name2 = NULL;
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL, (void *)&test_name2,
+	                sizeof(test_name2)),
+	    EINVAL, "Unexpected mallctl result writing to thread.prof.name");
 
 	/* '\n' shouldn't be allowed. */
-	thread_name = "hi\nthere";
-	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
+	const char *test_name3 = "test\ncase";
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL, (void *)&test_name3,
+	                sizeof(test_name3)),
+	    EINVAL,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
-	    thread_name);
+	    test_name3);
 
 	/* Simultaneous read/write shouldn't be allowed. */
-	{
-		const char *thread_name_old;
-		size_t sz;
-
-		sz = sizeof(thread_name_old);
-		expect_d_eq(mallctl("thread.prof.name",
-		    (void *)&thread_name_old, &sz, (void *)&thread_name,
-		    sizeof(thread_name)), EPERM,
-		    "Unexpected mallctl result writing \"%s\" to "
-		    "thread.prof.name", thread_name);
-	}
+	const char *thread_name_old;
+	size_t      sz = sizeof(thread_name_old);
+	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
+	                (void *)&test_name1, sizeof(test_name1)),
+	    EPERM, "Unexpected mallctl result from thread.prof.name");
 
 	mallctl_thread_name_set("");
 }
 TEST_END
 
-#define NTHREADS	4
-#define NRESET		25
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
-	char thread_name[16] = "";
+	char     thread_name[16] = "";
 	unsigned i;
 
 	malloc_snprintf(thread_name, sizeof(thread_name), "thread %u", thd_ind);
@@ -82,6 +89,7 @@ thd_start(void *varg) {
 	mallctl_thread_name_get("");
 	mallctl_thread_name_set(thread_name);
 
+#define NRESET 25
 	for (i = 0; i < NRESET; i++) {
 		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected error while resetting heap profile data");
@@ -92,13 +100,15 @@ thd_start(void *varg) {
 	mallctl_thread_name_set("");
 
 	return NULL;
+#undef NRESET
 }
 
 TEST_BEGIN(test_prof_thread_name_threaded) {
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof_sys_thread_name);
 
-	thd_t thds[NTHREADS];
+#define NTHREADS 4
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
@@ -109,14 +119,12 @@ TEST_BEGIN(test_prof_thread_name_threaded) {
 	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
+#undef NTHREADS
 }
 TEST_END
-#undef NTHREADS
-#undef NRESET
 
 int
 main(void) {
 	return test(
-	    test_prof_thread_name_validation,
-	    test_prof_thread_name_threaded);
+	    test_prof_thread_name_validation, test_prof_thread_name_threaded);
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 6ff72012..6ad653f5 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -19,8 +19,10 @@ static void
 test_psset_fake_purge(hpdata_t *ps) {
 	hpdata_purge_state_t purge_state;
 	hpdata_alloc_allowed_set(ps, false);
-	hpdata_purge_begin(ps, &purge_state);
-	void *addr;
+	size_t nranges;
+	hpdata_purge_begin(ps, &purge_state, &nranges);
+	(void)nranges;
+	void  *addr;
 	size_t size;
 	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
 	}
@@ -29,8 +31,8 @@ test_psset_fake_purge(hpdata_t *ps) {
 }
 
 static void
-test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
-    size_t size) {
+test_psset_alloc_new(
+    psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
 	hpdata_assert_empty(ps);
 
 	test_psset_fake_purge(ps);
@@ -38,12 +40,12 @@ test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
 	psset_insert(psset, ps);
 	psset_update_begin(psset, ps);
 
-        void *addr = hpdata_reserve_alloc(ps, size);
-        edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
-            /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
-            EXTENT_NOT_HEAD);
-        edata_ps_set(r_edata, ps);
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
+	edata_ps_set(r_edata, ps);
 	psset_update_end(psset, ps);
 }
 
@@ -64,6 +66,24 @@ test_psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
 	return false;
 }
 
+static hpdata_t *
+test_psset_hugify(psset_t *psset, edata_t *edata) {
+	hpdata_t *ps = edata_ps_get(edata);
+	psset_update_begin(psset, ps);
+	hpdata_hugify(ps);
+	psset_update_end(psset, ps);
+	return ps;
+}
+
+static hpdata_t *
+test_psset_dehugify(psset_t *psset, edata_t *edata) {
+	hpdata_t *ps = edata_ps_get(edata);
+	psset_update_begin(psset, ps);
+	hpdata_dehugify(ps);
+	psset_update_end(psset, ps);
+	return ps;
+}
+
 static hpdata_t *
 test_psset_dalloc(psset_t *psset, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
@@ -84,15 +104,14 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 	 * Note that allocations should get the arena ind of their home
 	 * arena, *not* the arena ind of the pageslab allocator.
 	 */
-	expect_u_eq(ALLOC_ARENA_IND, edata_arena_ind_get(edata),
-	    "Arena ind changed");
+	expect_u_eq(
+	    ALLOC_ARENA_IND, edata_arena_ind_get(edata), "Arena ind changed");
 	expect_ptr_eq(
 	    (void *)((uintptr_t)PAGESLAB_ADDR + (page_offset << LG_PAGE)),
 	    edata_addr_get(edata), "Didn't allocate in order");
 	expect_zu_eq(page_cnt << LG_PAGE, edata_size_get(edata), "");
 	expect_false(edata_slab_get(edata), "");
-	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata),
-	    "");
+	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata), "");
 	expect_u64_eq(0, edata_sn_get(edata), "");
 	expect_d_eq(edata_state_get(edata), extent_state_active, "");
 	expect_false(edata_zeroed_get(edata), "");
@@ -102,9 +121,11 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 }
 
 TEST_BEGIN(test_empty) {
-	bool err;
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	bool     err;
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t alloc;
 	edata_init_test(&alloc);
@@ -119,12 +140,14 @@ TEST_BEGIN(test_empty) {
 TEST_END
 
 TEST_BEGIN(test_fill) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -147,17 +170,21 @@ TEST_BEGIN(test_fill) {
 	edata_init_test(&extra_alloc);
 	err = test_psset_alloc_reuse(&psset, &extra_alloc, PAGE);
 	expect_true(err, "Alloc succeeded even though psset should be empty");
+
+	free(alloc);
 }
 TEST_END
 
 TEST_BEGIN(test_reuse) {
-	bool err;
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -171,7 +198,7 @@ TEST_BEGIN(test_reuse) {
 	}
 
 	/* Free odd indices. */
-	for (size_t i = 0; i < HUGEPAGE_PAGES; i ++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 2 == 0) {
 			continue;
 		}
@@ -239,17 +266,21 @@ TEST_BEGIN(test_reuse) {
 	err = test_psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_4], index_of_4, 4);
+
+	free(alloc);
 }
 TEST_END
 
 TEST_BEGIN(test_evict) {
-	bool err;
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -273,20 +304,25 @@ TEST_BEGIN(test_evict) {
 
 	err = test_psset_alloc_reuse(&psset, &alloc[0], PAGE);
 	expect_true(err, "psset should be empty.");
+
+	free(alloc);
 }
 TEST_END
 
 TEST_BEGIN(test_multi_pageslab) {
-	bool err;
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab[2];
-	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE);
-	hpdata_init(&pageslab[1],
-	    (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
-	    PAGESLAB_AGE + 1);
+	hpdata_init(
+	    &pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
+	hpdata_init(&pageslab[1], (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
+	    PAGESLAB_AGE + 1, /* is_huge */ false);
 
-	edata_t alloc[2][HUGEPAGE_PAGES];
+	edata_t *alloc[2];
+	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	alloc[1] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -301,9 +337,10 @@ TEST_BEGIN(test_multi_pageslab) {
 	for (size_t i = 0; i < 2; i++) {
 		for (size_t j = 1; j < HUGEPAGE_PAGES; j++) {
 			edata_init_test(&alloc[i][j]);
-			err = test_psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
-			expect_false(err,
-			    "Nonempty psset failed page allocation.");
+			err = test_psset_alloc_reuse(
+			    &psset, &alloc[i][j], PAGE);
+			expect_false(
+			    err, "Nonempty psset failed page allocation.");
 			assert_ptr_eq(&pageslab[i], edata_ps_get(&alloc[i][j]),
 			    "Didn't pick pageslabs in first-fit");
 		}
@@ -336,6 +373,160 @@ TEST_BEGIN(test_multi_pageslab) {
 	 */
 	err = test_psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
 	expect_false(err, "Allocation should have succeeded");
+
+	free(alloc[0]);
+	free(alloc[1]);
+}
+TEST_END
+
+TEST_BEGIN(test_stats_merged) {
+	hpdata_t pageslab;
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
+
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+
+	psset_t psset;
+	psset_init(&psset);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	edata_init_test(&alloc[0]);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
+		expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+		expect_zu_eq(i, psset.stats.merged.nactive, "");
+		expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+		edata_init_test(&alloc[i]);
+		bool err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	for (ssize_t i = HUGEPAGE_PAGES - 1; i > 0; i--) {
+		test_psset_dalloc(&psset, &alloc[i]);
+		expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+		expect_zu_eq(i, psset.stats.merged.nactive, "");
+		expect_zu_eq(HUGEPAGE_PAGES - i, psset.stats.merged.ndirty, "");
+	}
+	/* No allocations have left. */
+	test_psset_dalloc(&psset, &alloc[0]);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+
+	/*
+	 * Last test_psset_dalloc call removed empty pageslab from psset, so
+	 * nothing has left there, even no dirty pages.
+	 */
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(1, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	psset_update_begin(&psset, &pageslab);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	psset_update_end(&psset, &pageslab);
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(1, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	free(alloc);
+}
+TEST_END
+
+TEST_BEGIN(test_stats_huge) {
+	test_skip_if(!config_stats);
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+
+	hpdata_t pageslab;
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
+
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+
+	psset_t psset;
+	psset_init(&psset);
+	for (int huge = 0; huge < PSSET_NHUGE; ++huge) {
+		expect_zu_eq(0, psset.stats.slabs[huge].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].ndirty, "");
+	}
+
+	edata_init_test(&alloc[0]);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
+		expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+		expect_zu_eq(i, psset.stats.slabs[0].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+		expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+		edata_init_test(&alloc[i]);
+		bool err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+	expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	test_psset_hugify(&psset, &alloc[0]);
+
+	/* All stats should been moved from nonhuge to huge. */
+	expect_zu_eq(0, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(1, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	test_psset_dehugify(&psset, &alloc[0]);
+
+	/* And back from huge to nonhuge after dehugification. */
+	expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	for (ssize_t i = HUGEPAGE_PAGES - 1; i > 0; i--) {
+		test_psset_dalloc(&psset, &alloc[i]);
+
+		expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+		expect_zu_eq(i, psset.stats.slabs[0].nactive, "");
+		expect_zu_eq(
+		    HUGEPAGE_PAGES - i, psset.stats.slabs[0].ndirty, "");
+
+		expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+	}
+	test_psset_dalloc(&psset, &alloc[0]);
+
+	for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+		expect_zu_eq(0, psset.stats.slabs[huge].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].ndirty, "");
+	}
+
+	free(alloc);
 }
 TEST_END
 
@@ -343,7 +534,8 @@ static void
 stats_expect_empty(psset_bin_stats_t *stats) {
 	assert_zu_eq(0, stats->npageslabs,
 	    "Supposedly empty bin had positive npageslabs");
-	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->nactive,
+	    "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
 }
 
@@ -352,17 +544,16 @@ stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == HUGEPAGE_PAGES) {
 		expect_zu_eq(1, psset->stats.full_slabs[0].npageslabs,
 		    "Expected a full slab");
-		expect_zu_eq(HUGEPAGE_PAGES,
-		    psset->stats.full_slabs[0].nactive,
+		expect_zu_eq(HUGEPAGE_PAGES, psset->stats.full_slabs[0].nactive,
 		    "Should have exactly filled the bin");
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs[0]);
 	}
-	size_t ninactive = HUGEPAGE_PAGES - nactive;
+	size_t   ninactive = HUGEPAGE_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
 	if (ninactive != 0 && ninactive < HUGEPAGE_PAGES) {
-		nonempty_pind = sz_psz2ind(sz_psz_quantize_floor(
-		    ninactive << LG_PAGE));
+		nonempty_pind = sz_psz2ind(
+		    sz_psz_quantize_floor(ninactive << LG_PAGE));
 	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
@@ -379,13 +570,17 @@ stats_expect(psset_t *psset, size_t nactive) {
 	expect_zu_eq(nactive, psset_nactive(psset), "");
 }
 
-TEST_BEGIN(test_stats) {
+TEST_BEGIN(test_stats_fullness) {
+	test_skip_if(!config_stats);
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+
 	bool err;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -415,6 +610,8 @@ TEST_BEGIN(test_stats) {
 	stats_expect(&psset, 0);
 	psset_update_end(&psset, &pageslab);
 	stats_expect(&psset, 1);
+
+	free(alloc);
 }
 TEST_END
 
@@ -432,12 +629,14 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
     hpdata_t *worse_pageslab, edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
 
-	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE);
+	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE,
+	    /* is_huge */ false);
 	/*
 	 * This pageslab would be better from an address-first-fit POV, but
 	 * worse from an age POV.
 	 */
-	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1);
+	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1,
+	    /* is_huge */ false);
 
 	psset_init(psset);
 
@@ -468,23 +667,25 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	hpdata_t *evicted = test_psset_dalloc(psset,
-	    &alloc[HUGEPAGE_PAGES - 1]);
+	hpdata_t *evicted = test_psset_dalloc(
+	    psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
 }
 
 TEST_BEGIN(test_oldest_fit) {
-	bool err;
-	edata_t alloc[HUGEPAGE_PAGES];
-	edata_t worse_alloc[HUGEPAGE_PAGES];
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	bool     err;
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t *worse_alloc = (edata_t *)malloc(
+	    sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
-	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
-	    worse_alloc);
+	init_test_pageslabs(
+	    &psset, &pageslab, &worse_pageslab, alloc, worse_alloc);
 
 	/* The edata should come from the better pageslab. */
 	edata_t test_edata;
@@ -493,27 +694,32 @@ TEST_BEGIN(test_oldest_fit) {
 	expect_false(err, "Nonempty psset failed page allocation");
 	expect_ptr_eq(&pageslab, edata_ps_get(&test_edata),
 	    "Allocated from the wrong pageslab");
+
+	free(alloc);
+	free(worse_alloc);
 }
 TEST_END
 
 TEST_BEGIN(test_insert_remove) {
-	bool err;
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	bool      err;
 	hpdata_t *ps;
-	edata_t alloc[HUGEPAGE_PAGES];
-	edata_t worse_alloc[HUGEPAGE_PAGES];
+	edata_t  *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t  *worse_alloc = (edata_t *)malloc(
+            sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
-	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
-	    worse_alloc);
+	init_test_pageslabs(
+	    &psset, &pageslab, &worse_pageslab, alloc, worse_alloc);
 
 	/* Remove better; should still be able to alloc from worse. */
 	psset_update_begin(&psset, &pageslab);
-	err = test_psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1],
-	    PAGE);
+	err = test_psset_alloc_reuse(
+	    &psset, &worse_alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "Removal should still leave an empty page");
 	expect_ptr_eq(&worse_pageslab,
 	    edata_ps_get(&worse_alloc[HUGEPAGE_PAGES - 1]),
@@ -539,10 +745,14 @@ TEST_BEGIN(test_insert_remove) {
 	psset_update_begin(&psset, &worse_pageslab);
 	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
+
+	free(alloc);
+	free(worse_alloc);
 }
 TEST_END
 
 TEST_BEGIN(test_purge_prefers_nonhuge) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	/*
 	 * All else being equal, we should prefer purging non-huge pages over
 	 * huge ones for non-empty extents.
@@ -557,23 +767,22 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	psset_t psset;
 	psset_init(&psset);
 
-	hpdata_t hpdata_huge[NHP];
+	hpdata_t  hpdata_huge[NHP];
 	uintptr_t huge_begin = (uintptr_t)&hpdata_huge[0];
 	uintptr_t huge_end = (uintptr_t)&hpdata_huge[NHP];
-	hpdata_t hpdata_nonhuge[NHP];
+	hpdata_t  hpdata_nonhuge[NHP];
 	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
 	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
 
 	for (size_t i = 0; i < NHP; i++) {
 		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
-		    123 + i);
+		    123 + i, /* is_huge */ false);
 		psset_insert(&psset, &hpdata_huge[i]);
 
 		hpdata_init(&hpdata_nonhuge[i],
-		    (void *)((10 + NHP + i) * HUGEPAGE),
-		    456 + i);
+		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i,
+		    /* is_huge */ false);
 		psset_insert(&psset, &hpdata_nonhuge[i]);
-
 	}
 	for (int i = 0; i < 2 * NHP; i++) {
 		hpdata = psset_pick_alloc(&psset, HUGEPAGE * 3 / 4);
@@ -604,18 +813,20 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	 * further.
 	 */
 	for (int i = 0; i < NHP; i++) {
-		hpdata = psset_pick_purge(&psset);
+		hpdata = psset_pick_purge(&psset, NULL);
 		assert_true(nonhuge_begin <= (uintptr_t)hpdata
-		    && (uintptr_t)hpdata < nonhuge_end, "");
+		        && (uintptr_t)hpdata < nonhuge_end,
+		    "");
 		psset_update_begin(&psset, hpdata);
 		test_psset_fake_purge(hpdata);
 		hpdata_purge_allowed_set(hpdata, false);
 		psset_update_end(&psset, hpdata);
 	}
 	for (int i = 0; i < NHP; i++) {
-		hpdata = psset_pick_purge(&psset);
+		hpdata = psset_pick_purge(&psset, NULL);
 		expect_true(huge_begin <= (uintptr_t)hpdata
-		    && (uintptr_t)hpdata < huge_end, "");
+		        && (uintptr_t)hpdata < huge_end,
+		    "");
 		psset_update_begin(&psset, hpdata);
 		hpdata_dehugify(hpdata);
 		test_psset_fake_purge(hpdata);
@@ -625,7 +836,74 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_timing) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	hpdata_t hpdata_empty_nh;
+	hpdata_t hpdata_empty_huge;
+	hpdata_t hpdata_nonempty;
+
+	nstime_t       basetime, now, empty_nh_tm, empty_huge_tm, nonempty_tm;
+	const uint64_t BASE_SEC = 100;
+	nstime_init2(&basetime, BASE_SEC, 0);
+
+	/* Create and add to psset */
+	hpdata_init(&hpdata_empty_nh, (void *)(9 * HUGEPAGE), 102, false);
+	psset_insert(&psset, &hpdata_empty_nh);
+	hpdata_init(&hpdata_empty_huge, (void *)(10 * HUGEPAGE), 123, true);
+	psset_insert(&psset, &hpdata_empty_huge);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, false);
+	psset_insert(&psset, &hpdata_nonempty);
+
+	psset_update_begin(&psset, &hpdata_empty_nh);
+	ptr = hpdata_reserve_alloc(&hpdata_empty_nh, PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_empty_nh), ptr, "");
+	hpdata_unreserve(&hpdata_empty_nh, ptr, PAGE);
+	hpdata_purge_allowed_set(&hpdata_empty_nh, true);
+	nstime_init2(&empty_nh_tm, BASE_SEC + 100, 0);
+	hpdata_time_purge_allowed_set(&hpdata_empty_nh, &empty_nh_tm);
+	psset_update_end(&psset, &hpdata_empty_nh);
+
+	psset_update_begin(&psset, &hpdata_empty_huge);
+	ptr = hpdata_reserve_alloc(&hpdata_empty_huge, PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_empty_huge), ptr, "");
+	hpdata_unreserve(&hpdata_empty_huge, ptr, PAGE);
+	nstime_init2(&empty_huge_tm, BASE_SEC + 110, 0);
+	hpdata_time_purge_allowed_set(&hpdata_empty_huge, &empty_huge_tm);
+	hpdata_purge_allowed_set(&hpdata_empty_huge, true);
+	psset_update_end(&psset, &hpdata_empty_huge);
+
+	psset_update_begin(&psset, &hpdata_nonempty);
+	ptr = hpdata_reserve_alloc(&hpdata_nonempty, 10 * PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_nonempty), ptr, "");
+	hpdata_unreserve(&hpdata_nonempty, ptr, 9 * PAGE);
+	hpdata_purge_allowed_set(&hpdata_nonempty, true);
+	nstime_init2(&nonempty_tm, BASE_SEC + 80, 0);
+	hpdata_time_purge_allowed_set(&hpdata_nonempty, &nonempty_tm);
+	psset_update_end(&psset, &hpdata_nonempty);
+
+	/* The best to purge with no time restriction is the huge one */
+	hpdata_t *ps = psset_pick_purge(&psset, NULL);
+	expect_ptr_eq(&hpdata_empty_huge, ps, "Without tick, pick huge");
+
+	/* However, only the one eligible for purging can be picked */
+	nstime_init2(&now, BASE_SEC + 90, 0);
+	ps = psset_pick_purge(&psset, &now);
+	expect_ptr_eq(&hpdata_nonempty, ps, "Only non empty purgable");
+
+	/* When all eligible, huge empty is the best */
+	nstime_init2(&now, BASE_SEC + 110, 0);
+	ps = psset_pick_purge(&psset, &now);
+	expect_ptr_eq(&hpdata_empty_huge, ps, "Huge empty is the best");
+}
+TEST_END
+
 TEST_BEGIN(test_purge_prefers_empty) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
 
 	psset_t psset;
@@ -633,9 +911,11 @@ TEST_BEGIN(test_purge_prefers_empty) {
 
 	hpdata_t hpdata_empty;
 	hpdata_t hpdata_nonempty;
-	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123);
+	hpdata_init(
+	    &hpdata_empty, (void *)(10 * HUGEPAGE), 123, /* is_huge */ false);
 	psset_insert(&psset, &hpdata_empty);
-	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456,
+	    /* is_huge */ false);
 	psset_insert(&psset, &hpdata_nonempty);
 
 	psset_update_begin(&psset, &hpdata_empty);
@@ -656,31 +936,86 @@ TEST_BEGIN(test_purge_prefers_empty) {
 	 * The nonempty slab has 9 dirty pages, while the empty one has only 1.
 	 * We should still pick the empty one for purging.
 	 */
-	hpdata_t *to_purge = psset_pick_purge(&psset);
+	hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 	expect_ptr_eq(&hpdata_empty, to_purge, "");
 }
 TEST_END
 
-TEST_BEGIN(test_purge_prefers_empty_huge) {
+TEST_BEGIN(test_pick_purge_underflow) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
 
 	psset_t psset;
 	psset_init(&psset);
 
-	enum {NHP = 10 };
+	/*
+	 * Test that psset_pick_purge skips directly past a time-ineligible
+	 * entry without underflow.
+	 *
+	 * Create a hugified, non-empty hpdata with 1 dirty page, which
+	 * lands at purge list index 0 (pind=0, huge=true).  Set its
+	 * purge-allowed time in the future.  Calling psset_pick_purge
+	 * with a "now" before that time should return NULL without
+	 * looping through all higher indices on the way down.
+	 */
+	hpdata_t       hpdata_lowest;
+	nstime_t       future_tm, now;
+	const uint64_t BASE_SEC = 1000;
+
+	hpdata_init(&hpdata_lowest, (void *)(10 * HUGEPAGE), 100, false);
+	psset_insert(&psset, &hpdata_lowest);
+
+	psset_update_begin(&psset, &hpdata_lowest);
+	/* Allocate all pages. */
+	ptr = hpdata_reserve_alloc(&hpdata_lowest, HUGEPAGE_PAGES * PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_lowest), ptr, "");
+	/* Hugify the slab. */
+	hpdata_hugify(&hpdata_lowest);
+	/* Free the last page to create exactly 1 dirty page. */
+	hpdata_unreserve(&hpdata_lowest,
+	    (void *)((uintptr_t)ptr + (HUGEPAGE_PAGES - 1) * PAGE), PAGE);
+	/* Now: nactive = HUGEPAGE_PAGES-1, ndirty = 1, huge = true.
+	 * purge_list_ind = sz_psz2ind(sz_psz_quantize_floor(PAGE)) * 2 + 0
+	 * which should be index 0. */
+	hpdata_purge_allowed_set(&hpdata_lowest, true);
+	nstime_init2(&future_tm, BASE_SEC + 9999, 0);
+	hpdata_time_purge_allowed_set(&hpdata_lowest, &future_tm);
+	psset_update_end(&psset, &hpdata_lowest);
+
+	/*
+	 * Call with a "now" before the future time.  Should return NULL
+	 * (no eligible entry).
+	 */
+	nstime_init2(&now, BASE_SEC + 500, 0);
+	hpdata_t *to_purge = psset_pick_purge(&psset, &now);
+	expect_ptr_null(
+	    to_purge, "Should return NULL when no entry is time-eligible");
+}
+TEST_END
+
+TEST_BEGIN(test_purge_prefers_empty_huge) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	enum { NHP = 10 };
 
 	hpdata_t hpdata_huge[NHP];
 	hpdata_t hpdata_nonhuge[NHP];
 
 	uintptr_t cur_addr = 100 * HUGEPAGE;
-	uint64_t cur_age = 123;
+	uint64_t  cur_age = 123;
 	for (int i = 0; i < NHP; i++) {
-		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age);
+		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age,
+		    /* is_huge */ false);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_huge[i]);
 
-		hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age);
+		hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age,
+		    /* is_huge */ false);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_nonhuge[i]);
@@ -715,14 +1050,14 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 	 * any of the non-huge ones for purging.
 	 */
 	for (int i = 0; i < NHP; i++) {
-		hpdata_t *to_purge = psset_pick_purge(&psset);
+		hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 		expect_ptr_eq(&hpdata_huge[i], to_purge, "");
 		psset_update_begin(&psset, to_purge);
 		hpdata_purge_allowed_set(to_purge, false);
 		psset_update_end(&psset, to_purge);
 	}
 	for (int i = 0; i < NHP; i++) {
-		hpdata_t *to_purge = psset_pick_purge(&psset);
+		hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 		expect_ptr_eq(&hpdata_nonhuge[i], to_purge, "");
 		psset_update_begin(&psset, to_purge);
 		hpdata_purge_allowed_set(to_purge, false);
@@ -733,16 +1068,10 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_empty,
-	    test_fill,
-	    test_reuse,
-	    test_evict,
-	    test_multi_pageslab,
-	    test_stats,
-	    test_oldest_fit,
-	    test_insert_remove,
-	    test_purge_prefers_nonhuge,
-	    test_purge_prefers_empty,
+	return test_no_reentrancy(test_empty, test_fill, test_reuse, test_evict,
+	    test_multi_pageslab, test_stats_merged, test_stats_huge,
+	    test_stats_fullness, test_oldest_fit, test_insert_remove,
+	    test_purge_prefers_nonhuge, test_purge_timing,
+	    test_purge_prefers_empty, test_pick_purge_underflow,
 	    test_purge_prefers_empty_huge);
 }
diff --git a/test/unit/ql.c b/test/unit/ql.c
index f9130582..ff3b436e 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -15,16 +15,16 @@ struct list_s {
 
 static void
 test_empty_list(list_head_t *head) {
-	list_t *t;
+	list_t  *t;
 	unsigned i;
 
 	expect_true(ql_empty(head), "Unexpected element for empty list");
 	expect_ptr_null(ql_first(head), "Unexpected element for empty list");
-	expect_ptr_null(ql_last(head, link),
-	    "Unexpected element for empty list");
+	expect_ptr_null(
+	    ql_last(head, link), "Unexpected element for empty list");
 
 	i = 0;
-	ql_foreach(t, head, link) {
+	ql_foreach (t, head, link) {
 		i++;
 	}
 	expect_u_eq(i, 0, "Unexpected element for empty list");
@@ -56,48 +56,48 @@ init_entries(list_t *entries, unsigned nentries) {
 
 static void
 test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
-	list_t *t;
+	list_t  *t;
 	unsigned i;
 
 	expect_false(ql_empty(head), "List should not be empty");
 	expect_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
-	expect_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
+	expect_c_eq(ql_last(head, link)->id, entries[nentries - 1].id,
 	    "Element id mismatch");
 
 	i = 0;
-	ql_foreach(t, head, link) {
+	ql_foreach (t, head, link) {
 		expect_c_eq(t->id, entries[i].id, "Element id mismatch");
 		i++;
 	}
 
 	i = 0;
 	ql_reverse_foreach(t, head, link) {
-		expect_c_eq(t->id, entries[nentries-i-1].id,
-		    "Element id mismatch");
+		expect_c_eq(
+		    t->id, entries[nentries - i - 1].id, "Element id mismatch");
 		i++;
 	}
 
-	for (i = 0; i < nentries-1; i++) {
+	for (i = 0; i < nentries - 1; i++) {
 		t = ql_next(head, &entries[i], link);
-		expect_c_eq(t->id, entries[i+1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i + 1].id, "Element id mismatch");
 	}
-	expect_ptr_null(ql_next(head, &entries[nentries-1], link),
-	    "Unexpected element");
+	expect_ptr_null(
+	    ql_next(head, &entries[nentries - 1], link), "Unexpected element");
 
 	expect_ptr_null(ql_prev(head, &entries[0], link), "Unexpected element");
 	for (i = 1; i < nentries; i++) {
 		t = ql_prev(head, &entries[i], link);
-		expect_c_eq(t->id, entries[i-1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i - 1].id, "Element id mismatch");
 	}
 }
 
 TEST_BEGIN(test_ql_tail_insert) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
@@ -108,17 +108,17 @@ TEST_END
 
 TEST_BEGIN(test_ql_tail_remove) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
 
 	for (i = 0; i < NENTRIES; i++) {
-		test_entries_list(&head, entries, NENTRIES-i);
+		test_entries_list(&head, entries, NENTRIES - i);
 		ql_tail_remove(&head, list_t, link);
 	}
 	test_empty_list(&head);
@@ -127,13 +127,13 @@ TEST_END
 
 TEST_BEGIN(test_ql_head_insert) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
-		ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+		ql_head_insert(&head, &entries[NENTRIES - i - 1], link);
 	}
 
 	test_entries_list(&head, entries, NENTRIES);
@@ -142,17 +142,17 @@ TEST_END
 
 TEST_BEGIN(test_ql_head_remove) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
-		ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+		ql_head_insert(&head, &entries[NENTRIES - i - 1], link);
 	}
 
 	for (i = 0; i < NENTRIES; i++) {
-		test_entries_list(&head, &entries[i], NENTRIES-i);
+		test_entries_list(&head, &entries[i], NENTRIES - i);
 		ql_head_remove(&head, list_t, link);
 	}
 	test_empty_list(&head);
@@ -161,11 +161,11 @@ TEST_END
 
 TEST_BEGIN(test_ql_insert) {
 	list_head_t head;
-	list_t entries[8];
-	list_t *a, *b, *c, *d, *e, *f, *g, *h;
+	list_t      entries[8];
+	list_t     *a, *b, *c, *d, *e, *f, *g, *h;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	a = &entries[0];
 	b = &entries[1];
 	c = &entries[2];
@@ -190,13 +190,13 @@ TEST_BEGIN(test_ql_insert) {
 	ql_after_insert(c, d, link);
 	ql_before_insert(&head, f, e, link);
 
-	test_entries_list(&head, entries, sizeof(entries)/sizeof(list_t));
+	test_entries_list(&head, entries, sizeof(entries) / sizeof(list_t));
 }
 TEST_END
 
 static void
-test_concat_split_entries(list_t *entries, unsigned nentries_a,
-    unsigned nentries_b) {
+test_concat_split_entries(
+    list_t *entries, unsigned nentries_a, unsigned nentries_b) {
 	init_entries(entries, nentries_a + nentries_b);
 
 	list_head_t head_a;
@@ -253,8 +253,8 @@ TEST_BEGIN(test_ql_concat_split) {
 
 	test_concat_split_entries(entries, 0, NENTRIES);
 	test_concat_split_entries(entries, 1, NENTRIES - 1);
-	test_concat_split_entries(entries, NENTRIES / 2,
-	    NENTRIES - NENTRIES / 2);
+	test_concat_split_entries(
+	    entries, NENTRIES / 2, NENTRIES - NENTRIES / 2);
 	test_concat_split_entries(entries, NENTRIES - 1, 1);
 	test_concat_split_entries(entries, NENTRIES, 0);
 }
@@ -262,11 +262,11 @@ TEST_END
 
 TEST_BEGIN(test_ql_rotate) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
@@ -284,15 +284,15 @@ TEST_END
 
 TEST_BEGIN(test_ql_move) {
 	list_head_t head_dest, head_src;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head_src);
 	ql_move(&head_dest, &head_src);
 	test_empty_list(&head_src);
 	test_empty_list(&head_dest);
 
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head_src, &entries[i], link);
 	}
@@ -304,14 +304,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ql_empty,
-	    test_ql_tail_insert,
-	    test_ql_tail_remove,
-	    test_ql_head_insert,
-	    test_ql_head_remove,
-	    test_ql_insert,
-	    test_ql_concat_split,
-	    test_ql_rotate,
-	    test_ql_move);
+	return test(test_ql_empty, test_ql_tail_insert, test_ql_tail_remove,
+	    test_ql_head_insert, test_ql_head_remove, test_ql_insert,
+	    test_ql_concat_split, test_ql_rotate, test_ql_move);
 }
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 16eed0e9..3d8b164b 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -26,12 +26,12 @@ init_entries(ring_t *entries) {
 
 static void
 test_independent_entries(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
+		qr_foreach (t, &entries[i], link) {
 			j++;
 		}
 		expect_u_eq(j, 1,
@@ -71,13 +71,13 @@ TEST_END
 
 static void
 test_entries_ring(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(i+j) % NENTRIES].id,
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(t->id, entries[(i + j) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
@@ -85,25 +85,26 @@ test_entries_ring(ring_t *entries) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(NENTRIES+i-j-1) %
-			    NENTRIES].id, "Element id mismatch");
+			expect_c_eq(t->id,
+			    entries[(NENTRIES + i - j - 1) % NENTRIES].id,
+			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i + 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES + i - 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
 
 TEST_BEGIN(test_qr_after_insert) {
-	ring_t entries[NENTRIES];
+	ring_t   entries[NENTRIES];
 	unsigned i;
 
 	init_entries(entries);
@@ -115,8 +116,8 @@ TEST_BEGIN(test_qr_after_insert) {
 TEST_END
 
 TEST_BEGIN(test_qr_remove) {
-	ring_t entries[NENTRIES];
-	ring_t *t;
+	ring_t   entries[NENTRIES];
+	ring_t  *t;
 	unsigned i, j;
 
 	init_entries(entries);
@@ -126,15 +127,15 @@ TEST_BEGIN(test_qr_remove) {
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[i+j].id,
-			    "Element id mismatch");
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(
+			    t->id, entries[i + j].id, "Element id mismatch");
 			j++;
 		}
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
 			expect_c_eq(t->id, entries[NENTRIES - 1 - j].id,
-			"Element id mismatch");
+			    "Element id mismatch");
 			j++;
 		}
 		qr_remove(&entries[i], link);
@@ -144,8 +145,8 @@ TEST_BEGIN(test_qr_remove) {
 TEST_END
 
 TEST_BEGIN(test_qr_before_insert) {
-	ring_t entries[NENTRIES];
-	ring_t *t;
+	ring_t   entries[NENTRIES];
+	ring_t  *t;
 	unsigned i, j;
 
 	init_entries(entries);
@@ -154,28 +155,29 @@ TEST_BEGIN(test_qr_before_insert) {
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(NENTRIES+i-j) %
-			    NENTRIES].id, "Element id mismatch");
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(t->id,
+			    entries[(NENTRIES + i - j) % NENTRIES].id,
+			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(i+j+1) % NENTRIES].id,
+			expect_c_eq(t->id, entries[(i + j + 1) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES + i - 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i + 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
@@ -183,19 +185,22 @@ TEST_END
 
 static void
 test_split_entries(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
+		qr_foreach (t, &entries[i], link) {
 			if (i < SPLIT_INDEX) {
 				expect_c_eq(t->id,
-				    entries[(i+j) % SPLIT_INDEX].id,
+				    entries[(i + j) % SPLIT_INDEX].id,
 				    "Element id mismatch");
 			} else {
-				expect_c_eq(t->id, entries[(i+j-SPLIT_INDEX) %
-				    (NENTRIES-SPLIT_INDEX) + SPLIT_INDEX].id,
+				expect_c_eq(t->id,
+				    entries[(i + j - SPLIT_INDEX)
+				            % (NENTRIES - SPLIT_INDEX)
+				        + SPLIT_INDEX]
+				        .id,
 				    "Element id mismatch");
 			}
 			j++;
@@ -204,7 +209,7 @@ test_split_entries(ring_t *entries) {
 }
 
 TEST_BEGIN(test_qr_meld_split) {
-	ring_t entries[NENTRIES];
+	ring_t   entries[NENTRIES];
 	unsigned i;
 
 	init_entries(entries);
@@ -234,10 +239,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_qr_one,
-	    test_qr_after_insert,
-	    test_qr_remove,
-	    test_qr_before_insert,
-	    test_qr_meld_split);
+	return test(test_qr_one, test_qr_after_insert, test_qr_remove,
+	    test_qr_before_insert, test_qr_meld_split);
 }
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 827ec510..790593e3 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -4,16 +4,17 @@
 
 #include "jemalloc/internal/rb.h"
 
-#define rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
-	a_type *rbp_bh_t;						\
-	for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; rbp_bh_t !=	\
-	    NULL; rbp_bh_t = rbtn_left_get(a_type, a_field,		\
-	    rbp_bh_t)) {						\
-		if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {		\
-		(r_height)++;						\
-		}							\
-	}								\
-} while (0)
+#define rbtn_black_height(a_type, a_field, a_rbt, r_height)                    \
+	do {                                                                   \
+		a_type *rbp_bh_t;                                              \
+		for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0;             \
+		     rbp_bh_t != NULL;                                         \
+		     rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) {    \
+			if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {        \
+				(r_height)++;                                  \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 static bool summarize_always_returns_true = false;
 
@@ -55,7 +56,7 @@ struct node_s {
 	 */
 	const node_t *summary_lchild;
 	const node_t *summary_rchild;
-	uint64_t summary_max_specialness;
+	uint64_t      summary_max_specialness;
 };
 
 static int
@@ -80,8 +81,8 @@ node_cmp(const node_t *a, const node_t *b) {
 }
 
 static uint64_t
-node_subtree_specialness(node_t *n, const node_t *lchild,
-    const node_t *rchild) {
+node_subtree_specialness(
+    node_t *n, const node_t *lchild, const node_t *rchild) {
 	uint64_t subtree_specialness = n->specialness;
 	if (lchild != NULL
 	    && lchild->summary_max_specialness > subtree_specialness) {
@@ -109,8 +110,8 @@ node_summarize(node_t *a, const node_t *lchild, const node_t *rchild) {
 
 typedef rb_tree(node_t) tree_t;
 rb_summarized_proto(static, tree_, tree_t, node_t);
-rb_summarized_gen(static, tree_, tree_t, node_t, link, node_cmp,
-    node_summarize);
+rb_summarized_gen(
+    static, tree_, tree_t, node_t, link, node_cmp, node_summarize);
 
 static bool
 specialness_filter_node(void *ctx, node_t *node) {
@@ -127,24 +128,24 @@ specialness_filter_subtree(void *ctx, node_t *node) {
 static node_t *
 tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
-	node_t *search_node;
+	node_t   *search_node;
 
 	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Test rb_search(). */
 	search_node = tree_search(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_search() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_search() returned unexpected node");
 
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_nsearch() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_nsearch() returned unexpected node");
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_psearch() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_psearch() returned unexpected node");
 
 	(*i)++;
 
@@ -174,38 +175,44 @@ TEST_BEGIN(test_rb_empty) {
 	expect_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
 
 	unsigned nodes = 0;
-	tree_iter_filtered(&tree, NULL, &tree_iterate_cb,
-	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
-	    NULL);
+	tree_iter_filtered(&tree, NULL, &tree_iterate_cb, &nodes,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL);
 	expect_u_eq(0, nodes, "");
 
 	nodes = 0;
-	tree_reverse_iter_filtered(&tree, NULL, &tree_iterate_cb,
-	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
-	    NULL);
+	tree_reverse_iter_filtered(&tree, NULL, &tree_iterate_cb, &nodes,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL);
 	expect_u_eq(0, nodes, "");
 
 	expect_ptr_null(tree_first_filtered(&tree, &specialness_filter_node,
-	    &specialness_filter_subtree, NULL), "");
+	                    &specialness_filter_subtree, NULL),
+	    "");
 	expect_ptr_null(tree_last_filtered(&tree, &specialness_filter_node,
-	    &specialness_filter_subtree, NULL), "");
+	                    &specialness_filter_subtree, NULL),
+	    "");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	expect_ptr_null(tree_search_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
-	expect_ptr_null(tree_nsearch_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
-	expect_ptr_null(tree_psearch_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(
+	    tree_search_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
+	expect_ptr_null(
+	    tree_nsearch_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
+	expect_ptr_null(
+	    tree_psearch_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
 }
 TEST_END
 
 static unsigned
 tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	unsigned ret = 0;
-	node_t *left_node;
-	node_t *right_node;
+	node_t  *left_node;
+	node_t  *right_node;
 
 	if (node == NULL) {
 		return ret;
@@ -214,13 +221,13 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	left_node = rbtn_left_get(node_t, link, node);
 	right_node = rbtn_right_get(node_t, link, node);
 
-	expect_ptr_eq(left_node, node->summary_lchild,
-	    "summary missed a tree update");
-	expect_ptr_eq(right_node, node->summary_rchild,
-	    "summary missed a tree update");
+	expect_ptr_eq(
+	    left_node, node->summary_lchild, "summary missed a tree update");
+	expect_ptr_eq(
+	    right_node, node->summary_rchild, "summary missed a tree update");
 
-	uint64_t expected_subtree_specialness = node_subtree_specialness(node,
-	    left_node, right_node);
+	uint64_t expected_subtree_specialness = node_subtree_specialness(
+	    node, left_node, right_node);
 	expect_u64_eq(expected_subtree_specialness,
 	    node->summary_max_specialness, "Incorrect summary");
 
@@ -232,7 +239,7 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	if (rbtn_red_get(node_t, link, node)) {
 		if (left_node != NULL) {
 			expect_false(rbtn_red_get(node_t, link, left_node),
-				"Node should be black");
+			    "Node should be black");
 		}
 		if (right_node != NULL) {
 			expect_false(rbtn_red_get(node_t, link, right_node),
@@ -282,7 +289,7 @@ tree_iterate_reverse(tree_t *tree) {
 
 static void
 node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
-	node_t *search_node;
+	node_t  *search_node;
 	unsigned black_height, imbalances;
 
 	tree_remove(tree, node);
@@ -290,15 +297,15 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
 	if (search_node != NULL) {
-		expect_u64_ge(search_node->key, node->key,
-		    "Key ordering error");
+		expect_u64_ge(
+		    search_node->key, node->key, "Key ordering error");
 	}
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
 	if (search_node != NULL) {
-		expect_u64_le(search_node->key, node->key,
-		    "Key ordering error");
+		expect_u64_le(
+		    search_node->key, node->key, "Key ordering error");
 	}
 
 	node->magic = 0;
@@ -306,16 +313,16 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	rbtn_black_height(node_t, link, tree, black_height);
 	imbalances = tree_recurse(tree->rbt_root, black_height, 0);
 	expect_u_eq(imbalances, 0, "Tree is unbalanced");
-	expect_u_eq(tree_iterate(tree), nnodes-1,
-	    "Unexpected node iteration count");
-	expect_u_eq(tree_iterate_reverse(tree), nnodes-1,
+	expect_u_eq(
+	    tree_iterate(tree), nnodes - 1, "Unexpected node iteration count");
+	expect_u_eq(tree_iterate_reverse(tree), nnodes - 1,
 	    "Unexpected node iteration count");
 }
 
 static node_t *
 remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
-	node_t *ret = tree_next(tree, node);
+	node_t   *ret = tree_next(tree, node);
 
 	node_remove(tree, node, *nnodes);
 
@@ -325,7 +332,7 @@ remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
 static node_t *
 remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
-	node_t *ret = tree_prev(tree, node);
+	node_t   *ret = tree_prev(tree, node);
 
 	node_remove(tree, node, *nnodes);
 
@@ -341,15 +348,11 @@ destroy_cb(node_t *node, void *data) {
 }
 
 TEST_BEGIN(test_rb_random) {
-	enum {
-		NNODES = 25,
-		NBAGS = 500,
-		SEED = 42
-	};
-	sfmt_t *sfmt;
+	enum { NNODES = 25, NBAGS = 500, SEED = 42 };
+	sfmt_t  *sfmt;
 	uint64_t bag[NNODES];
-	tree_t tree;
-	node_t nodes[NNODES];
+	tree_t   tree;
+	node_t   nodes[NNODES];
 	unsigned i, j, k, black_height, imbalances;
 
 	sfmt = init_gen_rand(SEED);
@@ -386,8 +389,8 @@ TEST_BEGIN(test_rb_random) {
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
-				nodes[k].specialness = gen_rand64_range(sfmt,
-				    NNODES);
+				nodes[k].specialness = gen_rand64_range(
+				    sfmt, NNODES);
 				nodes[k].mid_remove = false;
 				nodes[k].allow_duplicates = false;
 				nodes[k].summary_lchild = NULL;
@@ -399,16 +402,16 @@ TEST_BEGIN(test_rb_random) {
 			for (k = 0; k < j; k++) {
 				tree_insert(&tree, &nodes[k]);
 
-				rbtn_black_height(node_t, link, &tree,
-				    black_height);
-				imbalances = tree_recurse(tree.rbt_root,
-				    black_height, 0);
-				expect_u_eq(imbalances, 0,
-				    "Tree is unbalanced");
+				rbtn_black_height(
+				    node_t, link, &tree, black_height);
+				imbalances = tree_recurse(
+				    tree.rbt_root, black_height, 0);
+				expect_u_eq(
+				    imbalances, 0, "Tree is unbalanced");
 
-				expect_u_eq(tree_iterate(&tree), k+1,
+				expect_u_eq(tree_iterate(&tree), k + 1,
 				    "Unexpected node iteration count");
-				expect_u_eq(tree_iterate_reverse(&tree), k+1,
+				expect_u_eq(tree_iterate_reverse(&tree), k + 1,
 				    "Unexpected node iteration count");
 
 				expect_false(tree_empty(&tree),
@@ -431,11 +434,11 @@ TEST_BEGIN(test_rb_random) {
 				break;
 			case 1:
 				for (k = j; k > 0; k--) {
-					node_remove(&tree, &nodes[k-1], k);
+					node_remove(&tree, &nodes[k - 1], k);
 				}
 				break;
 			case 2: {
-				node_t *start;
+				node_t  *start;
 				unsigned nnodes = j;
 
 				start = NULL;
@@ -444,11 +447,12 @@ TEST_BEGIN(test_rb_random) {
 					    remove_iterate_cb, (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				expect_u_eq(nnodes, 0,
-				    "Removal terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Removal terminated early");
 				break;
-			} case 3: {
-				node_t *start;
+			}
+			case 3: {
+				node_t  *start;
 				unsigned nnodes = j;
 
 				start = NULL;
@@ -458,16 +462,18 @@ TEST_BEGIN(test_rb_random) {
 					    (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				expect_u_eq(nnodes, 0,
-				    "Removal terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Removal terminated early");
 				break;
-			} case 4: {
+			}
+			case 4: {
 				unsigned nnodes = j;
 				tree_destroy(&tree, destroy_cb, &nnodes);
-				expect_u_eq(nnodes, 0,
-				    "Destruction terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Destruction terminated early");
 				break;
-			} default:
+			}
+			default:
 				not_reached();
 			}
 		}
@@ -479,7 +485,7 @@ TEST_END
 static void
 expect_simple_consistency(tree_t *tree, uint64_t specialness,
     bool expected_empty, node_t *expected_first, node_t *expected_last) {
-	bool empty;
+	bool    empty;
 	node_t *first;
 	node_t *last;
 
@@ -487,19 +493,17 @@ expect_simple_consistency(tree_t *tree, uint64_t specialness,
 	    &specialness_filter_subtree, &specialness);
 	expect_b_eq(expected_empty, empty, "");
 
-	first = tree_first_filtered(tree,
-	    &specialness_filter_node, &specialness_filter_subtree,
-	    (void *)&specialness);
+	first = tree_first_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, (void *)&specialness);
 	expect_ptr_eq(expected_first, first, "");
 
-	last = tree_last_filtered(tree,
-	    &specialness_filter_node, &specialness_filter_subtree,
-	    (void *)&specialness);
+	last = tree_last_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, (void *)&specialness);
 	expect_ptr_eq(expected_last, last, "");
 }
 
 TEST_BEGIN(test_rb_filter_simple) {
-	enum {FILTER_NODES = 10};
+	enum { FILTER_NODES = 10 };
 	node_t nodes[FILTER_NODES];
 	for (unsigned i = 0; i < FILTER_NODES; i++) {
 		nodes[i].magic = NODE_MAGIC;
@@ -583,10 +587,10 @@ TEST_END
 
 typedef struct iter_ctx_s iter_ctx_t;
 struct iter_ctx_s {
-	int ncalls;
+	int     ncalls;
 	node_t *last_node;
 
-	int ncalls_max;
+	int  ncalls_max;
 	bool forward;
 };
 
@@ -624,8 +628,8 @@ static void
 check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 	uint64_t specialness = 1;
 
-	bool empty;
-	bool real_empty = true;
+	bool    empty;
+	bool    real_empty = true;
 	node_t *first;
 	node_t *real_first = NULL;
 	node_t *last;
@@ -667,12 +671,14 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			}
 			if (node_cmp(&nodes[j], &nodes[i]) < 0
 			    && (real_prev_filtered == NULL
-			    || node_cmp(&nodes[j], real_prev_filtered) > 0)) {
+			        || node_cmp(&nodes[j], real_prev_filtered)
+			            > 0)) {
 				real_prev_filtered = &nodes[j];
 			}
 			if (node_cmp(&nodes[j], &nodes[i]) > 0
 			    && (real_next_filtered == NULL
-			    || node_cmp(&nodes[j], real_next_filtered) < 0)) {
+			        || node_cmp(&nodes[j], real_next_filtered)
+			            < 0)) {
 				real_next_filtered = &nodes[j];
 			}
 		}
@@ -707,8 +713,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &before,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -721,22 +728,25 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
 
 		/* search, nsearch, psearch from nodes[i] */
-		real_search_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : NULL);
+		real_search_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : NULL);
 		search_filtered = tree_search_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -750,22 +760,25 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		equiv.magic = NODE_MAGIC;
 		equiv.key = nodes[i].key;
 		equiv.allow_duplicates = true;
-		real_search_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : NULL);
+		real_search_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : NULL);
 		search_filtered = tree_search_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -791,8 +804,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &after,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -800,7 +814,7 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 	}
 
 	/* Filtered iteration test setup. */
-	int nspecial = 0;
+	int     nspecial = 0;
 	node_t *sorted_nodes[UPDATE_TEST_MAX];
 	node_t *sorted_filtered_nodes[UPDATE_TEST_MAX];
 	for (int i = 0; i < nnodes; i++) {
@@ -862,8 +876,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			    &specialness_filter_node,
 			    &specialness_filter_subtree, &specialness);
 			expect_d_eq(j + 1, ctx.ncalls, "");
-			expect_ptr_eq(sorted_filtered_nodes[
-			    nodes[i].filtered_rank + j], iter_result, "");
+			expect_ptr_eq(
+			    sorted_filtered_nodes[nodes[i].filtered_rank + j],
+			    iter_result, "");
 		}
 	}
 
@@ -888,8 +903,8 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness_filter_subtree, &specialness);
 		expect_ptr_null(iter_result, "");
 		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
-		expect_d_eq(nodes[i].filtered_rank + surplus_rank, ctx.ncalls,
-		    "");
+		expect_d_eq(
+		    nodes[i].filtered_rank + surplus_rank, ctx.ncalls, "");
 	}
 	/* Filtered backward iteration from the end, with stopping */
 	for (int i = 0; i < nspecial; i++) {
@@ -899,15 +914,15 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		iter_result = tree_reverse_iter_filtered(tree, NULL,
 		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
 		    &specialness_filter_subtree, &specialness);
-		expect_ptr_eq(sorted_filtered_nodes[nspecial - i - 1],
-		    iter_result, "");
+		expect_ptr_eq(
+		    sorted_filtered_nodes[nspecial - i - 1], iter_result, "");
 		expect_d_eq(ctx.ncalls, i + 1, "");
 	}
 	/* Filtered backward iteration from a starting point, with stopping. */
 	for (int i = 0; i < nnodes; i++) {
 		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
 		for (int j = 0; j < nodes[i].filtered_rank + surplus_rank;
-		    j++) {
+		     j++) {
 			ctx.ncalls = 0;
 			ctx.last_node = NULL;
 			ctx.ncalls_max = j + 1;
@@ -916,16 +931,16 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			    &specialness_filter_node,
 			    &specialness_filter_subtree, &specialness);
 			expect_d_eq(j + 1, ctx.ncalls, "");
-			expect_ptr_eq(sorted_filtered_nodes[
-			    nodes[i].filtered_rank - j - 1 + surplus_rank],
+			expect_ptr_eq(
+			    sorted_filtered_nodes[nodes[i].filtered_rank - j - 1
+			        + surplus_rank],
 			    iter_result, "");
 		}
 	}
 }
 
 static void
-do_update_search_test(int nnodes, int ntrees, int nremovals,
-    int nupdates) {
+do_update_search_test(int nnodes, int ntrees, int nremovals, int nupdates) {
 	node_t nodes[UPDATE_TEST_MAX];
 	assert(nnodes <= UPDATE_TEST_MAX);
 
@@ -987,8 +1002,8 @@ rb_gen(static UNUSED, unsummarized_tree_, unsummarized_tree_t, node_t, link,
     node_cmp);
 
 static node_t *
-unsummarized_tree_iterate_cb(unsummarized_tree_t *tree, node_t *node,
-    void *data) {
+unsummarized_tree_iterate_cb(
+    unsummarized_tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
 	(*i)++;
 	return NULL;
@@ -1002,18 +1017,14 @@ TEST_BEGIN(test_rb_unsummarized) {
 	unsummarized_tree_t tree;
 	unsummarized_tree_new(&tree);
 	unsigned nnodes = 0;
-	unsummarized_tree_iter(&tree, NULL, &unsummarized_tree_iterate_cb,
-	    &nnodes);
+	unsummarized_tree_iter(
+	    &tree, NULL, &unsummarized_tree_iterate_cb, &nnodes);
 	expect_u_eq(0, nnodes, "");
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_rb_empty,
-	    test_rb_random,
-	    test_rb_filter_simple,
-	    test_rb_update_search,
-	    test_rb_unsummarized);
+	return test_no_reentrancy(test_rb_empty, test_rb_random,
+	    test_rb_filter_simple, test_rb_update_search, test_rb_unsummarized);
 }
diff --git a/test/unit/retained.c b/test/unit/retained.c
index aa9f6847..687701c7 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -3,21 +3,22 @@
 #include "jemalloc/internal/san.h"
 #include "jemalloc/internal/spin.h"
 
-static unsigned		arena_ind;
-static size_t		sz;
-static size_t		esz;
-#define NEPOCHS		8
-#define PER_THD_NALLOCS	1
-static atomic_u_t	epoch;
-static atomic_u_t	nfinished;
+static unsigned arena_ind;
+static size_t   sz;
+static size_t   esz;
+#define NEPOCHS 8
+#define PER_THD_NALLOCS 1
+static atomic_u_t epoch;
+static atomic_u_t nfinished;
 
 static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned new_arena_ind;
-	size_t ind_sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&new_arena_ind, &ind_sz,
-	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
-	    "Unexpected mallctl() failure");
+	size_t   ind_sz = sizeof(unsigned);
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&new_arena_ind, &ind_sz,
+	        (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)),
+	    0, "Unexpected mallctl() failure");
 	return new_arena_ind;
 }
 
@@ -26,7 +27,7 @@ do_arena_destroy(unsigned ind) {
 	size_t mib[3];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)ind;
@@ -38,7 +39,8 @@ static void
 do_refresh(void) {
 	uint64_t refresh_epoch = 1;
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&refresh_epoch,
-	    sizeof(refresh_epoch)), 0, "Unexpected mallctl() failure");
+	                sizeof(refresh_epoch)),
+	    0, "Unexpected mallctl() failure");
 }
 
 static size_t
@@ -47,12 +49,12 @@ do_get_size_impl(const char *cmd, unsigned ind) {
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 	size_t z = sizeof(size_t);
 
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	size_t size;
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
 
 	return size;
 }
@@ -72,9 +74,9 @@ thd_start(void *arg) {
 	for (unsigned next_epoch = 1; next_epoch < NEPOCHS; next_epoch++) {
 		/* Busy-wait for next epoch. */
 		unsigned cur_epoch;
-		spin_t spinner = SPIN_INITIALIZER;
-		while ((cur_epoch = atomic_load_u(&epoch, ATOMIC_ACQUIRE)) !=
-		    next_epoch) {
+		spin_t   spinner = SPIN_INITIALIZER;
+		while ((cur_epoch = atomic_load_u(&epoch, ATOMIC_ACQUIRE))
+		    != next_epoch) {
 			spin_adaptive(&spinner);
 		}
 		expect_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
@@ -84,11 +86,10 @@ thd_start(void *arg) {
 		 * no need to deallocate.
 		 */
 		for (unsigned i = 0; i < PER_THD_NALLOCS; i++) {
-			void *p = mallocx(sz, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE
-			    );
-			expect_ptr_not_null(p,
-			    "Unexpected mallocx() failure\n");
+			void *p = mallocx(
+			    sz, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+			expect_ptr_not_null(
+			    p, "Unexpected mallocx() failure\n");
 		}
 
 		/* Let the main thread know we've finished this iteration. */
@@ -110,8 +111,15 @@ TEST_BEGIN(test_retained) {
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
 
 	unsigned nthreads = ncpus * 2;
-	if (LG_SIZEOF_PTR < 3 && nthreads > 16) {
-		nthreads = 16; /* 32-bit platform could run out of vaddr. */
+	if (nthreads > 16) {
+		/*
+		 * Limit number of threads we are creating for following
+		 * reasons.
+		 * 1. On 32-bit platforms could run out of vaddr.
+		 * 2. On boxes with a lot of CPUs we might have not enough
+		 *    memory to fit thd_t into VARIABLE_ARRAY.
+		 */
+		nthreads = 16;
 	}
 	VARIABLE_ARRAY(thd_t, threads, nthreads);
 	for (unsigned i = 0; i < nthreads; i++) {
@@ -135,18 +143,17 @@ TEST_BEGIN(test_retained) {
 		 */
 		do_refresh();
 
-		size_t allocated = (esz - guard_sz) * nthreads *
-		    PER_THD_NALLOCS;
+		size_t allocated = (esz - guard_sz) * nthreads
+		    * PER_THD_NALLOCS;
 		size_t active = do_get_active(arena_ind);
 		expect_zu_le(allocated, active, "Unexpected active memory");
 		size_t mapped = do_get_mapped(arena_ind);
 		expect_zu_le(active, mapped, "Unexpected mapped memory");
 
 		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
-		size_t usable = 0;
-		size_t fragmented = 0;
-		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.pac.exp_grow.next; pind++) {
+		size_t   usable = 0;
+		for (pszind_t pind = sz_psz2ind(HUGEPAGE);
+		     pind < arena->pa_shard.pac.exp_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
@@ -156,9 +163,8 @@ TEST_BEGIN(test_retained) {
 			if (psz_usable > 0) {
 				expect_zu_lt(usable, allocated,
 				    "Excessive retained memory "
-				    "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
-				    allocated);
-				fragmented += psz_fragmented;
+				    "(%#zx[+%#zx] > %#zx)",
+				    usable, psz_usable, allocated);
 				usable += psz_usable;
 			}
 		}
@@ -169,8 +175,8 @@ TEST_BEGIN(test_retained) {
 		 * (rather than retaining) during reset.
 		 */
 		do_arena_destroy(arena_ind);
-		expect_u_eq(do_arena_create(NULL), arena_ind,
-		    "Unexpected arena index");
+		expect_u_eq(
+		    do_arena_create(NULL), arena_ind, "Unexpected arena index");
 	}
 
 	for (unsigned i = 0; i < nthreads; i++) {
@@ -183,6 +189,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_retained);
+	return test(test_retained);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 4101b72b..284c3eae 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -16,14 +16,15 @@ TEST_BEGIN(test_rtree_read_empty) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 	rtree_contents_t contents;
-	expect_true(rtree_read_independent(tsdn, rtree, &rtree_ctx, PAGE,
-	    &contents), "rtree_read_independent() should fail on empty rtree.");
+	expect_true(
+	    rtree_read_independent(tsdn, rtree, &rtree_ctx, PAGE, &contents),
+	    "rtree_read_independent() should fail on empty rtree.");
 
 	base_delete(tsdn, base);
 }
@@ -45,9 +46,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	edata_t *edata_a, *edata_b;
 	edata_a = alloc_edata();
 	edata_b = alloc_edata();
-	edata_init(edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
-	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+	edata_init(edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS, false,
+	    sz_size2index(SC_LARGE_MINCLASS), 0, extent_state_active, false,
+	    false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	edata_init(edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
@@ -57,11 +58,11 @@ TEST_BEGIN(test_rtree_extrema) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	rtree_contents_t contents_a;
 	contents_a.edata = edata_a;
@@ -73,13 +74,14 @@ TEST_BEGIN(test_rtree_extrema) {
 	    "Unexpected rtree_write() failure");
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
-	rtree_contents_t read_contents_a = rtree_read(tsdn, rtree, &rtree_ctx,
-	    PAGE);
+	rtree_contents_t read_contents_a = rtree_read(
+	    tsdn, rtree, &rtree_ctx, PAGE);
 	expect_true(contents_a.edata == read_contents_a.edata
-	    && contents_a.metadata.szind == read_contents_a.metadata.szind
-	    && contents_a.metadata.slab == read_contents_a.metadata.slab
-	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head
-	    && contents_a.metadata.state == read_contents_a.metadata.state,
+	        && contents_a.metadata.szind == read_contents_a.metadata.szind
+	        && contents_a.metadata.slab == read_contents_a.metadata.slab
+	        && contents_a.metadata.is_head
+	            == read_contents_a.metadata.is_head
+	        && contents_a.metadata.state == read_contents_a.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	rtree_contents_t contents_b;
@@ -88,15 +90,17 @@ TEST_BEGIN(test_rtree_extrema) {
 	contents_b.metadata.slab = edata_slab_get(edata_b);
 	contents_b.metadata.is_head = edata_is_head_get(edata_b);
 	contents_b.metadata.state = edata_state_get(edata_b);
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
-	    contents_b), "Unexpected rtree_write() failure");
-	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
-	    ~((uintptr_t)0));
+	expect_false(
+	    rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0), contents_b),
+	    "Unexpected rtree_write() failure");
+	rtree_contents_t read_contents_b = rtree_read(
+	    tsdn, rtree, &rtree_ctx, ~((uintptr_t)0));
 	assert_true(contents_b.edata == read_contents_b.edata
-	    && contents_b.metadata.szind == read_contents_b.metadata.szind
-	    && contents_b.metadata.slab == read_contents_b.metadata.slab
-	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head
-	    && contents_b.metadata.state == read_contents_b.metadata.state,
+	        && contents_b.metadata.szind == read_contents_b.metadata.szind
+	        && contents_b.metadata.slab == read_contents_b.metadata.slab
+	        && contents_b.metadata.is_head
+	            == read_contents_b.metadata.is_head
+	        && contents_b.metadata.state == read_contents_b.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
@@ -109,19 +113,19 @@ TEST_BEGIN(test_rtree_bits) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	uintptr_t keys[] = {PAGE, PAGE + 1,
-	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
+	uintptr_t keys[] = {
+	    PAGE, PAGE + 1, PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 	edata_t *edata_c = alloc_edata();
 	edata_init(edata_c, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
-	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
+	for (unsigned i = 0; i < sizeof(keys) / sizeof(uintptr_t); i++) {
 		rtree_contents_t contents;
 		contents.edata = edata_c;
 		contents.metadata.szind = SC_NSIZES;
@@ -129,18 +133,22 @@ TEST_BEGIN(test_rtree_bits) {
 		contents.metadata.is_head = false;
 		contents.metadata.state = extent_state_active;
 
-		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
-		    contents), "Unexpected rtree_write() failure");
-		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-			    keys[j]).edata, edata_c,
+		expect_false(
+		    rtree_write(tsdn, rtree, &rtree_ctx, keys[i], contents),
+		    "Unexpected rtree_write() failure");
+		for (unsigned j = 0; j < sizeof(keys) / sizeof(uintptr_t);
+		     j++) {
+			expect_ptr_eq(
+			    rtree_read(tsdn, rtree, &rtree_ctx, keys[j]).edata,
+			    edata_c,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
-			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
-			    j, keys[i], keys[j]);
+			    "j=%u, set key=%#" FMTxPTR ", get key=%#" FMTxPTR,
+			    i, j, keys[i], keys[j]);
 		}
 		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    (((uintptr_t)2) << LG_PAGE)).edata,
+		                    (((uintptr_t)2) << LG_PAGE))
+		                    .edata,
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
 	}
@@ -159,8 +167,8 @@ TEST_BEGIN(test_rtree_random) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	uintptr_t keys[NSET];
-	rtree_t *rtree = &test_rtree;
+	uintptr_t   keys[NSET];
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
@@ -168,15 +176,15 @@ TEST_BEGIN(test_rtree_random) {
 	edata_init(edata_d, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
-		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree,
-		    &rtree_ctx, keys[i], false, true);
-		expect_ptr_not_null(elm,
-		    "Unexpected rtree_leaf_elm_lookup() failure");
+		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(
+		    tsdn, rtree, &rtree_ctx, keys[i], false, true);
+		expect_ptr_not_null(
+		    elm, "Unexpected rtree_leaf_elm_lookup() failure");
 		rtree_contents_t contents;
 		contents.edata = edata_d;
 		contents.metadata.szind = SC_NSIZES;
@@ -184,26 +192,27 @@ TEST_BEGIN(test_rtree_random) {
 		contents.metadata.is_head = false;
 		contents.metadata.state = edata_state_get(edata_d);
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, edata_d,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, edata_d,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata,
-		   "rtree_edata_read() should return previously set value");
+		expect_ptr_null(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata,
+		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata,
+		expect_ptr_null(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 
@@ -215,8 +224,8 @@ TEST_BEGIN(test_rtree_random) {
 TEST_END
 
 static void
-test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
-    uintptr_t end) {
+test_rtree_range_write(
+    tsdn_t *tsdn, rtree_t *rtree, uintptr_t start, uintptr_t end) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
@@ -230,15 +239,17 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 	contents.metadata.is_head = false;
 	contents.metadata.state = extent_state_active;
 
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start,
-	    contents), "Unexpected rtree_write() failure");
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end,
-	    contents), "Unexpected rtree_write() failure");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start, contents),
+	    "Unexpected rtree_write() failure");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end, contents),
+	    "Unexpected rtree_write() failure");
 
 	rtree_write_range(tsdn, rtree, &rtree_ctx, start, end, contents);
 	for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    start + (i << LG_PAGE)).edata, edata_e,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, start + (i << LG_PAGE))
+		        .edata,
+		    edata_e,
 		    "rtree_edata_read() should return previously set value");
 	}
 	rtree_clear_range(tsdn, rtree, &rtree_ctx, start, end);
@@ -247,8 +258,9 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 		elm = rtree_leaf_elm_lookup(tsdn, rtree, &rtree_ctx,
 		    start + (i << LG_PAGE), false, false);
 		expect_ptr_not_null(elm, "Should have been initialized.");
-		expect_ptr_null(rtree_leaf_elm_read(tsdn, rtree, elm,
-		    false).edata, "Should have been cleared.");
+		expect_ptr_null(
+		    rtree_leaf_elm_read(tsdn, rtree, elm, false).edata,
+		    "Should have been cleared.");
 	}
 }
 
@@ -259,8 +271,8 @@ TEST_BEGIN(test_rtree_range) {
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	/* Not crossing rtree node boundary first. */
 	uintptr_t start = ZU(1) << rtree_leaf_maskbits();
@@ -280,10 +292,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_rtree_read_empty,
-	    test_rtree_extrema,
-	    test_rtree_bits,
-	    test_rtree_random,
-	    test_rtree_range);
+	return test(test_rtree_read_empty, test_rtree_extrema, test_rtree_bits,
+	    test_rtree_random, test_rtree_range);
 }
diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
index 84726675..558797c0 100644
--- a/test/unit/safety_check.c
+++ b/test/unit/safety_check.c
@@ -8,7 +8,8 @@
  */
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -26,7 +27,7 @@ TEST_BEGIN(test_malloc_free_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	free(ptr);
 	safety_check_set_abort(NULL);
@@ -42,7 +43,7 @@ TEST_BEGIN(test_mallocx_dallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = mallocx(128, 0);
+	char *ptr = mallocx(128, 0);
 	buffer_overflow_write(ptr, 128);
 	dallocx(ptr, 0);
 	safety_check_set_abort(NULL);
@@ -58,7 +59,7 @@ TEST_BEGIN(test_malloc_sdallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	sdallocx(ptr, 128, 0);
 	safety_check_set_abort(NULL);
@@ -74,7 +75,7 @@ TEST_BEGIN(test_realloc_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	ptr = realloc(ptr, 129);
 	safety_check_set_abort(NULL);
@@ -91,7 +92,7 @@ TEST_BEGIN(test_rallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	ptr = rallocx(ptr, 129, 0);
 	safety_check_set_abort(NULL);
@@ -108,7 +109,7 @@ TEST_BEGIN(test_xallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	size_t result = xallocx(ptr, 129, 0, 0);
 	expect_zu_eq(result, 128, "");
@@ -120,7 +121,7 @@ TEST_BEGIN(test_xallocx_overflow) {
 TEST_END
 
 TEST_BEGIN(test_realloc_no_overflow) {
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	ptr = realloc(ptr, 256);
 	ptr[128] = 0;
 	ptr[255] = 0;
@@ -135,7 +136,7 @@ TEST_BEGIN(test_realloc_no_overflow) {
 TEST_END
 
 TEST_BEGIN(test_rallocx_no_overflow) {
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	ptr = rallocx(ptr, 256, 0);
 	ptr[128] = 0;
 	ptr[255] = 0;
@@ -151,13 +152,8 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_malloc_free_overflow,
-	    test_mallocx_dallocx_overflow,
-	    test_malloc_sdallocx_overflow,
-	    test_realloc_overflow,
-	    test_rallocx_overflow,
-	    test_xallocx_overflow,
-	    test_realloc_no_overflow,
-	    test_rallocx_no_overflow);
+	return test(test_malloc_free_overflow, test_mallocx_dallocx_overflow,
+	    test_malloc_sdallocx_overflow, test_realloc_overflow,
+	    test_rallocx_overflow, test_xallocx_overflow,
+	    test_realloc_no_overflow, test_rallocx_no_overflow);
 }
diff --git a/test/unit/san.c b/test/unit/san.c
index 5b98f52e..2c7f1ec5 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -6,8 +6,8 @@
 
 static void
 verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
-	expect_true(extent_is_guarded(tsdn, ptr),
-	    "All extents should be guarded.");
+	expect_true(
+	    extent_is_guarded(tsdn, ptr), "All extents should be guarded.");
 }
 
 #define MAX_SMALL_ALLOCATIONS 4096
@@ -21,13 +21,13 @@ void *small_alloc[MAX_SMALL_ALLOCATIONS];
 TEST_BEGIN(test_guarded_small) {
 	test_skip_if(opt_prof);
 
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	unsigned npages = 16, pages_found = 0, ends_found = 0;
 	VARIABLE_ARRAY(uintptr_t, pages, npages);
 
 	/* Allocate to get sanitized pointers. */
-	size_t slab_sz = PAGE;
-	size_t sz = slab_sz / 8;
+	size_t   slab_sz = PAGE;
+	size_t   sz = slab_sz / 8;
 	unsigned n_alloc = 0;
 	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
 		void *ptr = malloc(sz);
@@ -54,8 +54,9 @@ TEST_BEGIN(test_guarded_small) {
 	/* Verify the pages are not continuous, i.e. separated by guards. */
 	for (unsigned i = 0; i < npages - 1; i++) {
 		for (unsigned j = i + 1; j < npages; j++) {
-			uintptr_t ptr_diff = pages[i] > pages[j] ?
-			    pages[i] - pages[j] : pages[j] - pages[i];
+			uintptr_t ptr_diff = pages[i] > pages[j]
+			    ? pages[i] - pages[j]
+			    : pages[j] - pages[i];
 			expect_zu_ge((size_t)ptr_diff, slab_sz + PAGE,
 			    "There should be at least one pages between "
 			    "guarded slabs");
@@ -69,7 +70,7 @@ TEST_BEGIN(test_guarded_small) {
 TEST_END
 
 TEST_BEGIN(test_guarded_large) {
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	unsigned nlarge = 32;
 	VARIABLE_ARRAY(uintptr_t, large, nlarge);
 
@@ -85,8 +86,9 @@ TEST_BEGIN(test_guarded_large) {
 	/* Verify the pages are not continuous, i.e. separated by guards. */
 	for (unsigned i = 0; i < nlarge; i++) {
 		for (unsigned j = i + 1; j < nlarge; j++) {
-			uintptr_t ptr_diff = large[i] > large[j] ?
-			    large[i] - large[j] : large[j] - large[i];
+			uintptr_t ptr_diff = large[i] > large[j]
+			    ? large[i] - large[j]
+			    : large[j] - large[i];
 			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
 			    "There should be at least two pages between "
 			    " guarded large allocations");
@@ -102,15 +104,13 @@ TEST_END
 static void
 verify_pdirty(unsigned arena_ind, uint64_t expected) {
 	uint64_t pdirty = get_arena_pdirty(arena_ind);
-	expect_u64_eq(pdirty, expected / PAGE,
-	    "Unexpected dirty page amount.");
+	expect_u64_eq(pdirty, expected / PAGE, "Unexpected dirty page amount.");
 }
 
 static void
 verify_pmuzzy(unsigned arena_ind, uint64_t expected) {
 	uint64_t pmuzzy = get_arena_pmuzzy(arena_ind);
-	expect_u64_eq(pmuzzy, expected / PAGE,
-	    "Unexpected muzzy page amount.");
+	expect_u64_eq(pmuzzy, expected / PAGE, "Unexpected muzzy page amount.");
 }
 
 TEST_BEGIN(test_guarded_decay) {
@@ -140,7 +140,7 @@ TEST_BEGIN(test_guarded_decay) {
 	verify_pmuzzy(arena_ind, 0);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int     flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	/* Should reuse dirty extents for the two mallocx. */
 	void *p1 = do_mallocx(sz1, flags);
@@ -200,8 +200,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_guarded_small,
-	    test_guarded_large,
-	    test_guarded_decay);
+	return test(test_guarded_small, test_guarded_large, test_guarded_decay);
 }
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
index cafa37fe..54d8583d 100644
--- a/test/unit/san_bump.c
+++ b/test/unit/san_bump.c
@@ -4,6 +4,50 @@
 #include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/san_bump.h"
 
+static extent_hooks_t *san_bump_default_hooks;
+static extent_hooks_t  san_bump_hooks;
+static bool            fail_retained_alloc;
+static unsigned        retained_alloc_fail_calls;
+
+static void *
+san_bump_fail_alloc_hook(extent_hooks_t *UNUSED extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind) {
+	if (fail_retained_alloc && new_addr == NULL
+	    && size >= SBA_RETAINED_ALLOC_SIZE) {
+		retained_alloc_fail_calls++;
+		return NULL;
+	}
+	return san_bump_default_hooks->alloc(san_bump_default_hooks, new_addr,
+	    size, alignment, zero, commit, arena_ind);
+}
+
+static void
+install_san_bump_fail_alloc_hooks(unsigned arena_ind) {
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	size_t          old_size = sizeof(extent_hooks_t *);
+	size_t          new_size = sizeof(extent_hooks_t *);
+	extent_hooks_t *new_hooks;
+	extent_hooks_t *old_hooks;
+
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	hooks_mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	                &old_size, NULL, 0),
+	    0, "Unexpected extent_hooks error");
+
+	san_bump_default_hooks = old_hooks;
+	san_bump_hooks = *old_hooks;
+	san_bump_hooks.alloc = san_bump_fail_alloc_hook;
+	new_hooks = &san_bump_hooks;
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+	                (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks install failure");
+}
+
 TEST_BEGIN(test_san_bump_alloc) {
 	test_skip_if(!maps_coalesce || !opt_retain);
 
@@ -16,12 +60,12 @@ TEST_BEGIN(test_san_bump_alloc) {
 	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
 
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
-	pac_t *pac = &arena->pa_shard.pac;
+	pac_t   *pac = &arena->pa_shard.pac;
 
-	size_t alloc_size = PAGE * 16;
-	size_t alloc_n = alloc_size / sizeof(unsigned);
-	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size, /* zero */ false);
+	size_t   alloc_size = PAGE * 16;
+	size_t   alloc_n = alloc_size / sizeof(unsigned);
+	edata_t *edata = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size, /* zero */ false);
 
 	expect_ptr_not_null(edata, "Failed to allocate edata");
 	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
@@ -39,10 +83,10 @@ TEST_BEGIN(test_san_bump_alloc) {
 		((unsigned *)ptr)[i] = 1;
 	}
 
-	size_t alloc_size2 = PAGE * 28;
-	size_t alloc_n2 = alloc_size / sizeof(unsigned);
-	edata_t *edata2 = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size2, /* zero */ true);
+	size_t   alloc_size2 = PAGE * 28;
+	size_t   alloc_n2 = alloc_size / sizeof(unsigned);
+	edata_t *edata2 = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size2, /* zero */ true);
 
 	expect_ptr_not_null(edata2, "Failed to allocate edata");
 	expect_u_eq(edata_arena_ind_get(edata2), arena_ind,
@@ -57,11 +101,11 @@ TEST_BEGIN(test_san_bump_alloc) {
 	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
 
 	uintptr_t ptrdiff = ptr2 > ptr ? (uintptr_t)ptr2 - (uintptr_t)ptr
-	    : (uintptr_t)ptr - (uintptr_t)ptr2;
-	size_t between_allocs = (size_t)ptrdiff - alloc_size;
+	                               : (uintptr_t)ptr - (uintptr_t)ptr2;
+	size_t    between_allocs = (size_t)ptrdiff - alloc_size;
 
-	expect_zu_ge(between_allocs, PAGE,
-	    "Guard page between allocs is missing");
+	expect_zu_ge(
+	    between_allocs, PAGE, "Guard page between allocs is missing");
 
 	for (unsigned i = 0; i < alloc_n2; ++i) {
 		expect_u_eq(((unsigned *)ptr2)[i], 0, "Memory is not zeroed");
@@ -69,6 +113,48 @@ TEST_BEGIN(test_san_bump_alloc) {
 }
 TEST_END
 
+TEST_BEGIN(test_failed_grow_preserves_curr_reg) {
+	test_skip_if(!maps_coalesce || !opt_retain);
+
+	tsdn_t *tsdn = tsdn_fetch();
+
+	san_bump_alloc_t sba;
+	san_bump_alloc_init(&sba);
+
+	unsigned arena_ind = do_arena_create(0, 0);
+	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
+	install_san_bump_fail_alloc_hooks(arena_ind);
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	pac_t   *pac = &arena->pa_shard.pac;
+
+	size_t   small_alloc_size = PAGE * 16;
+	edata_t *edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    small_alloc_size, /* zero */ false);
+	expect_ptr_not_null(edata, "Initial san_bump allocation failed");
+	expect_ptr_not_null(sba.curr_reg,
+	    "Expected retained region remainder after initial allocation");
+
+	fail_retained_alloc = true;
+	retained_alloc_fail_calls = 0;
+
+	edata_t *failed = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    SBA_RETAINED_ALLOC_SIZE, /* zero */ false);
+	expect_ptr_null(failed, "Expected retained grow allocation failure");
+	expect_u_eq(retained_alloc_fail_calls, 1,
+	    "Expected exactly one failed retained allocation attempt");
+
+	edata_t *reused = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    small_alloc_size, /* zero */ false);
+	expect_ptr_not_null(
+	    reused, "Expected allocator to reuse preexisting current region");
+	expect_u_eq(retained_alloc_fail_calls, 1,
+	    "Reuse path should not attempt another retained grow allocation");
+
+	fail_retained_alloc = false;
+}
+TEST_END
+
 TEST_BEGIN(test_large_alloc_size) {
 	test_skip_if(!maps_coalesce || !opt_retain);
 
@@ -81,11 +167,11 @@ TEST_BEGIN(test_large_alloc_size) {
 	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
 
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
-	pac_t *pac = &arena->pa_shard.pac;
+	pac_t   *pac = &arena->pa_shard.pac;
 
-	size_t alloc_size = SBA_RETAINED_ALLOC_SIZE * 2;
-	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size, /* zero */ false);
+	size_t   alloc_size = SBA_RETAINED_ALLOC_SIZE * 2;
+	edata_t *edata = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size, /* zero */ false);
 	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
 	    "Edata was assigned an incorrect arena id");
 	expect_zu_eq(edata_size_get(edata), alloc_size,
@@ -105,7 +191,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_san_bump_alloc,
+	return test(test_san_bump_alloc, test_failed_grow_preserves_curr_reg,
 	    test_large_alloc_size);
 }
diff --git a/test/unit/sc.c b/test/unit/sc.c
index d207481c..725ede0e 100644
--- a/test/unit/sc.c
+++ b/test/unit/sc.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_update_slab_size) {
 	sc_data_t data;
 	memset(&data, 0, sizeof(data));
 	sc_data_init(&data);
-	sc_t *tiny = &data.sc[0];
+	sc_t  *tiny = &data.sc[0];
 	size_t tiny_size = (ZU(1) << tiny->lg_base)
 	    + (ZU(tiny->ndelta) << tiny->lg_delta);
 	size_t pgs_too_big = (tiny_size * BITMAP_MAXBITS + PAGE - 1) / PAGE + 1;
@@ -13,14 +13,14 @@ TEST_BEGIN(test_update_slab_size) {
 
 	sc_data_update_slab_size(&data, 1, 10 * PAGE, 1);
 	for (int i = 0; i < data.nbins; i++) {
-		sc_t *sc = &data.sc[i];
+		sc_t  *sc = &data.sc[i];
 		size_t reg_size = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
 		if (reg_size <= PAGE) {
 			expect_d_eq(sc->pgs, 1, "Ignored valid page size hint");
 		} else {
-			expect_d_gt(sc->pgs, 1,
-			    "Allowed invalid page size hint");
+			expect_d_gt(
+			    sc->pgs, 1, "Allowed invalid page size hint");
 		}
 	}
 }
@@ -28,6 +28,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_update_slab_size);
+	return test(test_update_slab_size);
 }
diff --git a/test/unit/sec.c b/test/unit/sec.c
index f3ec403d..2a6a00ce 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -2,633 +2,493 @@
 
 #include "jemalloc/internal/sec.h"
 
-typedef struct pai_test_allocator_s pai_test_allocator_t;
-struct pai_test_allocator_s {
-	pai_t pai;
-	bool alloc_fail;
-	size_t alloc_count;
-	size_t alloc_batch_count;
-	size_t dalloc_count;
-	size_t dalloc_batch_count;
+typedef struct test_data_s test_data_t;
+struct test_data_s {
 	/*
-	 * We use a simple bump allocator as the implementation.  This isn't
-	 * *really* correct, since we may allow expansion into a subsequent
-	 * allocation, but it's not like the SEC is really examining the
-	 * pointers it gets back; this is mostly just helpful for debugging.
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the sec_t;
 	 */
-	uintptr_t next_ptr;
-	size_t expand_count;
-	bool expand_return_value;
-	size_t shrink_count;
-	bool shrink_return_value;
+	sec_t   sec;
+	base_t *base;
 };
 
 static void
-test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
-    size_t max_bytes) {
-	sec_opts_t opts;
-	opts.nshards = 1;
-	opts.max_alloc = max_alloc;
-	opts.max_bytes = max_bytes;
-	/*
-	 * Just choose reasonable defaults for these; most tests don't care so
-	 * long as they're something reasonable.
-	 */
-	opts.bytes_after_flush = max_bytes / 2;
-	opts.batch_fill_extra = 4;
-
-	/*
-	 * We end up leaking this base, but that's fine; this test is
-	 * short-running, and SECs are arena-scoped in reality.
-	 */
-	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
+test_data_init(tsdn_t *tsdn, test_data_t *tdata, const sec_opts_t *opts) {
+	tdata->base = base_new(TSDN_NULL, /* ind */ 123,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 
-	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
+	bool err = sec_init(tsdn, &tdata->sec, tdata->base, opts);
 	assert_false(err, "Unexpected initialization failure");
-	assert_u_ge(sec->npsizes, 0, "Zero size classes allowed for caching");
-}
-
-static inline edata_t *
-pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	assert(!guarded);
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	if (ta->alloc_fail) {
-		return NULL;
+	if (tdata->sec.opts.nshards > 0) {
+		assert_u_ge(tdata->sec.npsizes, 0,
+		    "Zero size classes allowed for caching");
 	}
-	edata_t *edata = malloc(sizeof(edata_t));
-	assert_ptr_not_null(edata, "");
-	ta->next_ptr += alignment - 1;
-	edata_init(edata, /* arena_ind */ 0,
-	    (void *)(ta->next_ptr & ~(alignment - 1)), size,
-	    /* slab */ false,
-	    /* szind */ 0, /* sn */ 1, extent_state_active, /* zero */ zero,
-	    /* comitted */ true, /* ranged */ false, EXTENT_NOT_HEAD);
-	ta->next_ptr += size;
-	ta->alloc_count++;
-	return edata;
-}
-
-static inline size_t
-pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results,
-    bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	if (ta->alloc_fail) {
-		return 0;
-	}
-	for (size_t i = 0; i < nallocs; i++) {
-		edata_t *edata = malloc(sizeof(edata_t));
-		assert_ptr_not_null(edata, "");
-		edata_init(edata, /* arena_ind */ 0,
-		    (void *)ta->next_ptr, size,
-		    /* slab */ false, /* szind */ 0, /* sn */ 1,
-		    extent_state_active, /* zero */ false, /* comitted */ true,
-		    /* ranged */ false, EXTENT_NOT_HEAD);
-		ta->next_ptr += size;
-		ta->alloc_batch_count++;
-		edata_list_active_append(results, edata);
-	}
-	return nallocs;
-}
-
-static bool
-pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero,
-    bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->expand_count++;
-	return ta->expand_return_value;
-}
-
-static bool
-pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->shrink_count++;
-	return ta->shrink_return_value;
 }
 
 static void
-pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->dalloc_count++;
-	free(edata);
+destroy_test_data(tsdn_t *tsdn, test_data_t *tdata) {
+	/* There is no destroy sec to delete the bins ?! */
+	base_delete(tsdn, tdata->base);
 }
 
-static void
-pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-
-	edata_t *edata;
-	while ((edata = edata_list_active_first(list)) != NULL) {
-		edata_list_active_remove(list, edata);
-		ta->dalloc_batch_count++;
-		free(edata);
-	}
-}
-
-static inline void
-pai_test_allocator_init(pai_test_allocator_t *ta) {
-	ta->alloc_fail = false;
-	ta->alloc_count = 0;
-	ta->alloc_batch_count = 0;
-	ta->dalloc_count = 0;
-	ta->dalloc_batch_count = 0;
-	/* Just don't start the edata at 0. */
-	ta->next_ptr = 10 * PAGE;
-	ta->expand_count = 0;
-	ta->expand_return_value = false;
-	ta->shrink_count = 0;
-	ta->shrink_return_value = false;
-	ta->pai.alloc = &pai_test_allocator_alloc;
-	ta->pai.alloc_batch = &pai_test_allocator_alloc_batch;
-	ta->pai.expand = &pai_test_allocator_expand;
-	ta->pai.shrink = &pai_test_allocator_shrink;
-	ta->pai.dalloc = &pai_test_allocator_dalloc;
-	ta->pai.dalloc_batch = &pai_test_allocator_dalloc_batch;
-}
-
-TEST_BEGIN(test_reuse) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/*
-	 * We can't use the "real" tsd, since we malloc within the test
-	 * allocator hooks; we'd get lock inversion crashes.  Eventually, we
-	 * should have a way to mock tsds, but for now just don't do any
-	 * lock-order checking.
-	 */
-	tsdn_t *tsdn = TSDN_NULL;
-	/*
-	 * 11 allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 33 pages in the cache before triggering a flush.  We
-	 * set the flush liimt to twice this amount, to avoid accidentally
-	 * triggering a flush caused by the batch-allocation down the cache fill
-	 * pathway disrupting ordering.
-	 */
-	enum { NALLOCS = 11 };
-	edata_t *one_page[NALLOCS];
-	edata_t *two_page[NALLOCS];
-	bool deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
-	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
-	for (int i = 0; i < NALLOCS; i++) {
-		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
-		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
-	}
-	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(2 * NALLOCS, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
-	/*
-	 * Free in a different order than we allocated, to make sure free-list
-	 * separation works correctly.
-	 */
-	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, one_page[i],
-		    &deferred_work_generated);
-	}
-	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, two_page[i],
-		    &deferred_work_generated);
-	}
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
-	/*
-	 * Check that the n'th most recent deallocated extent is returned for
-	 * the n'th alloc request of a given size.
-	 */
-	for (int i = 0; i < NALLOCS; i++) {
-		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_eq(one_page[i], alloc1,
-		    "Got unexpected allocation");
-		expect_ptr_eq(two_page[i], alloc2,
-		    "Got unexpected allocation");
-	}
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
-}
-TEST_END
-
-
-TEST_BEGIN(test_auto_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-	/*
-	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 30 pages in the cache before triggering a flush.  The
-	 * choice of NALLOCS here is chosen to match the batch allocation
-	 * default (4 extra + 1 == 5; so 10 allocations leaves the cache exactly
-	 * empty, even in the presence of batch allocation on fill).
-	 * Eventually, once our allocation batching strategies become smarter,
-	 * this should change.
-	 */
-	enum { NALLOCS = 10 };
-	edata_t *extra_alloc;
-	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ NALLOCS * PAGE);
-	for (int i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(NALLOCS + 1, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
-	/* Free until the SEC is full, but should not have flushed yet. */
-	for (int i = 0; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	expect_zu_le(NALLOCS + 1, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
-	/*
-	 * Free the extra allocation; this should trigger a flush.  The internal
-	 * flushing logic is allowed to get complicated; for now, we rely on our
-	 * whitebox knowledge of the fact that the SEC flushes bins in their
-	 * entirety when it decides to do so, and it has only one bin active
-	 * right now.
-	 */
-	pai_dalloc(tsdn, &sec.pai, extra_alloc, &deferred_work_generated);
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(NALLOCS + 1, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-}
-TEST_END
-
-/*
- * A disable and a flush are *almost* equivalent; the only difference is what
- * happens afterwards; disabling disallows all future caching as well.
- */
-static void
-do_disable_flush_test(bool is_disable) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum { NALLOCS = 11 };
-	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ NALLOCS * PAGE);
-	for (int i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	/* Free all but the last aloc. */
-	for (int i = 0; i < NALLOCS - 1; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-
-	expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
-
-	if (is_disable) {
-		sec_disable(tsdn, &sec);
-	} else {
-		sec_flush(tsdn, &sec);
-	}
-
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_le(NALLOCS - 1, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-	size_t old_dalloc_batch_count = ta.dalloc_batch_count;
-
-	/*
-	 * If we free into a disabled SEC, it should forward to the fallback.
-	 * Otherwise, the SEC should accept the allocation.
-	 */
-	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1],
-	    &deferred_work_generated);
-
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(old_dalloc_batch_count, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-}
-
-TEST_BEGIN(test_disable) {
-	do_disable_flush_test(/* is_disable */ true);
-}
-TEST_END
-
-TEST_BEGIN(test_flush) {
-	do_disable_flush_test(/* is_disable */ false);
-}
-TEST_END
-
-TEST_BEGIN(test_max_alloc_respected) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	size_t max_alloc = 2 * PAGE;
-	size_t attempted_alloc = 3 * PAGE;
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
-	    /* max_bytes */ 1000 * PAGE);
-
-	for (size_t i = 0; i < 100; i++) {
-		expect_zu_eq(i, ta.alloc_count,
-		    "Incorrect number of allocations");
-		expect_zu_eq(i, ta.dalloc_count,
-		    "Incorrect number of deallocations");
-		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
-		    PAGE, /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_work_generated);
-		expect_ptr_not_null(edata, "Unexpected alloc failure");
-		expect_zu_eq(i + 1, ta.alloc_count,
-		    "Incorrect number of allocations");
-		expect_zu_eq(i, ta.dalloc_count,
-		    "Incorrect number of deallocations");
-		pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_expand_shrink_delegate) {
-	/*
-	 * Expand and shrink shouldn't affect sec state; they should just
-	 * delegate to the fallback PAI.
-	 */
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
-	    /* max_bytes */ 1000 * PAGE);
-	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	expect_ptr_not_null(edata, "Unexpected alloc failure");
-
-	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
-	    /* zero */ false, &deferred_work_generated);
-	expect_false(err, "Unexpected expand failure");
-	expect_zu_eq(1, ta.expand_count, "");
-	ta.expand_return_value = true;
-	err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
-	    /* zero */ false, &deferred_work_generated);
-	expect_true(err, "Unexpected expand success");
-	expect_zu_eq(2, ta.expand_count, "");
-
-	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE,
-	    &deferred_work_generated);
-	expect_false(err, "Unexpected shrink failure");
-	expect_zu_eq(1, ta.shrink_count, "");
-	ta.shrink_return_value = true;
-	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE,
-	    &deferred_work_generated);
-	expect_true(err, "Unexpected shrink success");
-	expect_zu_eq(2, ta.shrink_count, "");
-}
-TEST_END
-
-TEST_BEGIN(test_nshards_0) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
-
-	sec_opts_t opts = SEC_OPTS_DEFAULT;
+TEST_BEGIN(test_max_nshards_option_zero) {
+	test_data_t tdata;
+	sec_opts_t  opts;
 	opts.nshards = 0;
-	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 512 * PAGE;
 
-	bool deferred_work_generated = false;
-	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
 
-	/* Both operations should have gone directly to the fallback. */
-	expect_zu_eq(1, ta.alloc_count, "");
-	expect_zu_eq(1, ta.dalloc_count, "");
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC should be disabled when nshards==0");
+	destroy_test_data(tsdn, &tdata);
 }
 TEST_END
 
+TEST_BEGIN(test_max_alloc_option_too_small) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 512 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, 3 * PAGE);
+	expect_ptr_null(edata, "max_alloc is 2*PAGE, should not alloc 3*PAGE");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_fill) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 4 * PAGE;
+	opts.batch_fill_extra = 2;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Fill the cache with two extents */
+	sec_stats_t         stats = {0};
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1, edata2;
+	edata_size_set(&edata1, PAGE);
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+	edata_list_active_append(&allocs, &edata2);
+	sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 2);
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 2 * PAGE, "SEC should have what we filled");
+	expect_true(edata_list_active_empty(&allocs),
+	    "extents should be consumed by sec");
+
+	/* Try to overfill and confirm that max_bytes is respected. */
+	stats.bytes = 0;
+	edata_t edata5, edata4, edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_size_set(&edata4, PAGE);
+	edata_size_set(&edata5, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	edata_list_active_append(&allocs, &edata4);
+	edata_list_active_append(&allocs, &edata5);
+	sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 3);
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(
+	    stats.bytes, opts.max_bytes, "SEC can't have more than max_bytes");
+	expect_false(edata_list_active_empty(&allocs), "Not all should fit");
+	expect_zu_eq(stats.total.noverfills, 1, "Expected one overfill");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_alloc) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 4 * PAGE;
+	opts.batch_fill_extra = 1;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Alloc from empty cache returns NULL */
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC is empty");
+
+	/* Place two extents into the sec */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1, edata2;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(edata_list_active_empty(&allocs), "");
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(edata_list_active_empty(&allocs), "");
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 2 * PAGE,
+	    "After fill bytes should reflect what is in the cache");
+	stats.bytes = 0;
+
+	/* Most recently cached extent should be used on alloc */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_eq(edata, &edata2, "edata2 is most recently used");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, PAGE, "One more item left in the cache");
+	stats.bytes = 0;
+
+	/* Alloc can still get extents from cache */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_eq(edata, &edata1, "SEC is not empty");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 0, "No more items after last one is popped");
+
+	/* And cache is empty again */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC is empty");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_dalloc) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Return one extent into the cache */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty, we return one pointer to it */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	/* Return one more extent, so that we are at the limit */
+	edata_t edata2;
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	/* Sec can take one more as well and we will be exactly at max_bytes */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, opts.max_bytes, "Size should match deallocs");
+	stats.bytes = 0;
+
+	/*
+	 * We are at max_bytes.  Now, we dalloc one more pointer and we go above
+	 * the limit.  This will force flush to 3/4 of max_bytes and given that
+	 * we have max of 2 pages, we will have to flush two. We will not flush
+	 * the one given in the input as it is the most recently used.
+	 */
+	edata_t edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should NOT be consumed");
+	expect_ptr_ne(
+	    edata_list_active_first(&allocs), &edata3, "edata3 is MRU");
+	expect_ptr_ne(
+	    edata_list_active_last(&allocs), &edata3, "edata3 is MRU");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(PAGE, stats.bytes, "Should have flushed");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_max_bytes_too_low) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 4 * PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Return one extent into the cache. Item is too big */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, 3 * PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty, we return one pointer to it */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should not be consumed");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_flush) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 4 * PAGE;
+	opts.max_bytes = 1024 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* We put in 10 one-page extents, and 10 four-page extents */
+	edata_list_active_t allocs1;
+	edata_list_active_t allocs4;
+	edata_list_active_init(&allocs1);
+	edata_list_active_init(&allocs4);
+	enum { NALLOCS = 10 };
+	edata_t edata1[NALLOCS];
+	edata_t edata4[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edata_size_set(&edata1[i], PAGE);
+		edata_size_set(&edata4[i], 4 * PAGE);
+
+		edata_list_active_append(&allocs1, &edata1[i]);
+		sec_dalloc(tsdn, &tdata.sec, &allocs1);
+		edata_list_active_append(&allocs4, &edata4[i]);
+		sec_dalloc(tsdn, &tdata.sec, &allocs4);
+	}
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(
+	    stats.bytes, 10 * 5 * PAGE, "SEC should have what we filled");
+	stats.bytes = 0;
+
+	expect_true(edata_list_active_empty(&allocs1), "");
+	sec_flush(tsdn, &tdata.sec, &allocs1);
+	expect_false(edata_list_active_empty(&allocs1), "");
+
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 0, "SEC should be empty");
+	stats.bytes = 0;
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_stats) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty alloc fails. nmisses==1 */
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC should be empty");
+
+	/* SEC is empty, we return one pointer to it. ndalloc_noflush=1 */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	edata_t edata2;
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	/* Sec can take one more, so ndalloc_noflush=2 */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	sec_stats_t stats;
+	memset(&stats, 0, sizeof(sec_stats_t));
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, opts.max_bytes, "Size should match deallocs");
+	expect_zu_eq(stats.total.ndalloc_noflush, 2, "");
+	expect_zu_eq(stats.total.nmisses, 1, "");
+
+	memset(&stats, 0, sizeof(sec_stats_t));
+
+	/*
+	 * We are at max_bytes.  Now, we dalloc one more pointer and we go above
+	 * the limit.  This will force flush, so ndalloc_flush = 1.
+	 */
+	edata_t edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should NOT be consumed");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(PAGE, stats.bytes, "Should have flushed");
+	expect_zu_eq(stats.total.ndalloc_flush, 1, "");
+	memset(&stats, 0, sizeof(sec_stats_t));
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+#define NOPS_PER_THREAD 100
+#define NPREFILL 32
+
 static void
-expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
-	sec_stats_t stats;
+edata_init_test(edata_t *edata) {
+	memset(edata, 0, sizeof(*edata));
+}
+
+typedef struct {
+	sec_t              *sec;
+	uint8_t             preferred_shard;
+	size_t              nallocs;
+	size_t              nallocs_fail;
+	size_t              ndallocs;
+	size_t              ndallocs_fail;
+	edata_list_active_t fill_list;
+	size_t              fill_list_sz;
+	edata_t            *edata[NOPS_PER_THREAD];
+} trylock_test_arg_t;
+
+static void *
+thd_trylock_test(void *varg) {
+	trylock_test_arg_t *arg = (trylock_test_arg_t *)varg;
+	tsd_t              *tsd = tsd_fetch();
+	tsdn_t             *tsdn = tsd_tsdn(tsd);
+
+	/* Set the preferred shard for this thread */
+	uint8_t *shard_idx = tsd_sec_shardp_get(tsd);
+	*shard_idx = arg->preferred_shard;
+
+	/* Fill the shard with some extents */
+	sec_fill(tsdn, arg->sec, PAGE, &arg->fill_list, arg->fill_list_sz);
+	expect_true(edata_list_active_empty(&arg->fill_list), "");
+
+	for (unsigned i = 0; i < NOPS_PER_THREAD; i++) {
+		/* Try to allocate from SEC */
+		arg->edata[i] = sec_alloc(tsdn, arg->sec, PAGE);
+		if (arg->edata[i] != NULL) {
+			expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, "");
+		}
+	}
+
+	for (unsigned i = 0; i < NOPS_PER_THREAD; i++) {
+		if (arg->edata[i] != NULL) {
+			edata_list_active_t list;
+			edata_list_active_init(&list);
+			arg->nallocs++;
+			edata_list_active_append(&list, arg->edata[i]);
+			expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, "");
+			sec_dalloc(tsdn, arg->sec, &list);
+			if (edata_list_active_empty(&list)) {
+				arg->ndallocs++;
+			} else {
+				arg->ndallocs_fail++;
+			}
+		} else {
+			arg->nallocs_fail++;
+		}
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_sec_multishard) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	enum { NSHARDS = 2 };
+	enum { NTHREADS = NSHARDS * 16 };
+	opts.nshards = NSHARDS;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 64 * NTHREADS * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Create threads with different preferred shards */
+	thd_t              thds[NTHREADS];
+	trylock_test_arg_t args[NTHREADS];
+
+	edata_t all_edatas[NPREFILL * NTHREADS];
+
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		edata_list_active_init(&args[i].fill_list);
+		for (unsigned j = 0; j < NPREFILL; ++j) {
+			size_t ind = i * NPREFILL + j;
+			edata_init_test(&all_edatas[ind]);
+			edata_size_set(&all_edatas[ind], PAGE);
+			edata_list_active_append(
+			    &args[i].fill_list, &all_edatas[ind]);
+		}
+		args[i].fill_list_sz = NPREFILL;
+		args[i].sec = &tdata.sec;
+		args[i].preferred_shard = i % opts.nshards;
+		args[i].nallocs = 0;
+		args[i].nallocs_fail = 0;
+		args[i].ndallocs = 0;
+		args[i].ndallocs_fail = 0;
+		memset(
+		    &args[i].edata[0], 0, NOPS_PER_THREAD * sizeof(edata_t *));
+		thd_create(&thds[i], thd_trylock_test, &args[i]);
+	}
+
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+
+	/* Wait for all threads to complete */
+	size_t total_allocs = 0;
+	size_t total_dallocs = 0;
+	size_t total_allocs_fail = 0;
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		total_allocs += args[i].nallocs;
+		total_dallocs += args[i].ndallocs;
+		total_allocs_fail += args[i].nallocs_fail;
+	}
+
+	/* We must have at least some hits */
+	expect_zu_gt(total_allocs, 0, "");
 	/*
-	 * Check that the stats merging accumulates rather than overwrites by
-	 * putting some (made up) data there to begin with.
+	 * We must have at least some successful dallocs by design (max_bytes is
+	 * big enough).
 	 */
-	stats.bytes = 123;
-	sec_stats_merge(tsdn, sec, &stats);
-	assert_zu_le(npages * PAGE + 123, stats.bytes, "");
-}
+	expect_zu_gt(total_dallocs, 0, "");
 
-TEST_BEGIN(test_stats_simple) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
+	/* Get final stats to verify that hits and misses are accurate */
+	sec_stats_t stats = {0};
+	memset(&stats, 0, sizeof(sec_stats_t));
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.total.nhits, total_allocs, "");
+	expect_zu_eq(stats.total.nmisses, total_allocs_fail, "");
 
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		NITERS = 100,
-		FLUSH_PAGES = 20,
-	};
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	edata_t *allocs[FLUSH_PAGES];
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, 0);
-	}
-
-	/* Increase and decrease, without flushing. */
-	for (size_t i = 0; i < NITERS; i++) {
-		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			pai_dalloc(tsdn, &sec.pai, allocs[j],
-			    &deferred_work_generated);
-			expect_stats_pages(tsdn, &sec, j + 1);
-		}
-		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-			    /* zero */ false, /* guarded */ false,
-			    /* frequent_reuse */ false,
-			    &deferred_work_generated);
-			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
-		}
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_stats_auto_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		FLUSH_PAGES = 10,
-	};
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	edata_t *extra_alloc0;
-	edata_t *extra_alloc1;
-	edata_t *allocs[2 * FLUSH_PAGES];
-
-	bool deferred_work_generated = false;
-
-	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-
-	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-	}
-
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	pai_dalloc(tsdn, &sec.pai, extra_alloc0, &deferred_work_generated);
-
-	/* Flush the remaining pages; stats should still work. */
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i],
-		    &deferred_work_generated);
-	}
-
-	pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated);
-
-	expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
-	    - ta.dalloc_count - ta.dalloc_batch_count);
-}
-TEST_END
-
-TEST_BEGIN(test_stats_manual_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		FLUSH_PAGES = 10,
-	};
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	bool deferred_work_generated = false;
-	edata_t *allocs[FLUSH_PAGES];
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, 0);
-	}
-
-	/* Dalloc the first half of the allocations. */
-	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, i + 1);
-	}
-
-	sec_flush(tsdn, &sec);
-	expect_stats_pages(tsdn, &sec, 0);
-
-	/* Flush the remaining pages. */
-	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i],
-		    &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, i + 1);
-	}
-	sec_disable(tsdn, &sec);
-	expect_stats_pages(tsdn, &sec, 0);
+	destroy_test_data(tsdn, &tdata);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_reuse,
-	    test_auto_flush,
-	    test_disable,
-	    test_flush,
-	    test_max_alloc_respected,
-	    test_expand_shrink_delegate,
-	    test_nshards_0,
-	    test_stats_simple,
-	    test_stats_auto_flush,
-	    test_stats_manual_flush);
+	return test(test_max_nshards_option_zero,
+	    test_max_alloc_option_too_small, test_sec_fill, test_sec_alloc,
+	    test_sec_dalloc, test_max_bytes_too_low, test_sec_flush,
+	    test_sec_stats, test_sec_multishard);
 }
diff --git a/test/unit/seq.c b/test/unit/seq.c
index 06ed6834..ca6c74b1 100644
--- a/test/unit/seq.c
+++ b/test/unit/seq.c
@@ -24,7 +24,7 @@ expect_data(data_t *data) {
 
 seq_define(data_t, data)
 
-typedef struct thd_data_s thd_data_t;
+    typedef struct thd_data_s thd_data_t;
 struct thd_data_s {
 	seq_data_t data;
 };
@@ -32,8 +32,8 @@ struct thd_data_s {
 static void *
 seq_reader_thd(void *arg) {
 	thd_data_t *thd_data = (thd_data_t *)arg;
-	int iter = 0;
-	data_t local_data;
+	int         iter = 0;
+	data_t      local_data;
 	while (iter < 1000 * 1000 - 1) {
 		bool success = seq_try_load_data(&local_data, &thd_data->data);
 		if (success) {
@@ -49,7 +49,7 @@ seq_reader_thd(void *arg) {
 static void *
 seq_writer_thd(void *arg) {
 	thd_data_t *thd_data = (thd_data_t *)arg;
-	data_t local_data;
+	data_t      local_data;
 	memset(&local_data, 0, sizeof(local_data));
 	for (int i = 0; i < 1000 * 1000; i++) {
 		set_data(&local_data, i);
@@ -74,7 +74,7 @@ TEST_BEGIN(test_seq_threaded) {
 TEST_END
 
 TEST_BEGIN(test_seq_simple) {
-	data_t data;
+	data_t     data;
 	seq_data_t seq;
 	memset(&seq, 0, sizeof(seq));
 	for (int i = 0; i < 1000 * 1000; i++) {
@@ -88,8 +88,7 @@ TEST_BEGIN(test_seq_simple) {
 }
 TEST_END
 
-int main(void) {
-	return test_no_reentrancy(
-	    test_seq_simple,
-	    test_seq_threaded);
+int
+main(void) {
+	return test_no_reentrancy(test_seq_simple, test_seq_threaded);
 }
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
index accdc405..a31578bf 100644
--- a/test/unit/size_check.c
+++ b/test/unit/size_check.c
@@ -3,7 +3,8 @@
 #include "jemalloc/internal/safety_check.h"
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -14,7 +15,7 @@ void fake_abort(const char *message) {
 #define LARGE_SIZE1 SC_LARGE_MINCLASS
 #define LARGE_SIZE2 (LARGE_SIZE1 * 2)
 
-void *
+static void *
 test_invalid_size_pre(size_t sz) {
 	safety_check_set_abort(&fake_abort);
 
@@ -25,7 +26,7 @@ test_invalid_size_pre(size_t sz) {
 	return ptr;
 }
 
-void
+static void
 test_invalid_size_post(void) {
 	expect_true(fake_abort_called, "Safety check didn't fire");
 	safety_check_set_abort(NULL);
@@ -72,8 +73,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_invalid_size_sdallocx,
+	return test(test_invalid_size_sdallocx,
 	    test_invalid_size_sdallocx_nonzero_flag,
 	    test_invalid_size_sdallocx_noflags);
 }
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index c70eb592..5379047c 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -3,12 +3,13 @@
 static size_t
 get_max_size_class(void) {
 	unsigned nlextents;
-	size_t mib[4];
-	size_t sz, miblen, max_size_class;
+	size_t   mib[4];
+	size_t   sz, miblen, max_size_class;
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
@@ -16,96 +17,137 @@ get_max_size_class(void) {
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
-	    NULL, 0), 0, "Unexpected mallctlbymib() error");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() error");
 
 	return max_size_class;
 }
 
 TEST_BEGIN(test_size_classes) {
-	size_t size_class, max_size_class;
-	szind_t index, max_index;
+	size_t  size_class, max_size_class;
+	szind_t index, gen_index, max_index;
 
-	max_size_class = get_max_size_class();
+	max_size_class = sz_large_size_classes_disabled()
+	    ? SC_SMALL_MAXCLASS
+	    : get_max_size_class();
 	max_index = sz_size2index(max_size_class);
 
-	for (index = 0, size_class = sz_index2size(index); index < max_index ||
-	    size_class < max_size_class; index++, size_class =
-	    sz_index2size(index)) {
+	for (index = 0, size_class = sz_index2size(index);
+	     index < max_index || size_class < max_size_class;
+	     index++, size_class = sz_index2size(index)) {
+		gen_index = sz_size2index(size_class);
 		expect_true(index < max_index,
 		    "Loop conditionals should be equivalent; index=%u, "
-		    "size_class=%zu (%#zx)", index, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    index, size_class, size_class);
 		expect_true(size_class < max_size_class,
 		    "Loop conditionals should be equivalent; index=%u, "
-		    "size_class=%zu (%#zx)", index, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    index, size_class, size_class);
 
-		expect_u_eq(index, sz_size2index(size_class),
+		expect_u_eq(index, gen_index,
 		    "sz_size2index() does not reverse sz_index2size(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
-		    index, size_class, sz_size2index(size_class),
-		    sz_index2size(sz_size2index(size_class)));
-		expect_zu_eq(size_class,
-		    sz_index2size(sz_size2index(size_class)),
+		    index, size_class, gen_index, sz_index2size(gen_index));
+
+		expect_zu_eq(size_class, sz_index2size(gen_index),
 		    "sz_index2size() does not reverse sz_size2index(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
-		    index, size_class, sz_size2index(size_class),
-		    sz_index2size(sz_size2index(size_class)));
+		    index, size_class, gen_index, sz_index2size(gen_index));
 
-		expect_u_eq(index+1, sz_size2index(size_class+1),
+		expect_u_eq(index + 1, sz_size2index(size_class + 1),
 		    "Next size_class does not round up properly");
 
-		expect_zu_eq(size_class, (index > 0) ?
-		    sz_s2u(sz_index2size(index-1)+1) : sz_s2u(1),
+		expect_zu_eq(size_class,
+		    (index > 0) ? sz_s2u(sz_index2size(index - 1) + 1)
+		                : sz_s2u(1),
 		    "sz_s2u() does not round up to size class");
-		expect_zu_eq(size_class, sz_s2u(size_class-1),
+		expect_zu_eq(size_class, sz_s2u(size_class - 1),
 		    "sz_s2u() does not round up to size class");
 		expect_zu_eq(size_class, sz_s2u(size_class),
 		    "sz_s2u() does not compute same size class");
-		expect_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
+		expect_zu_eq(sz_s2u(size_class + 1), sz_index2size(index + 1),
 		    "sz_s2u() does not round up to next size class");
 	}
 
 	expect_u_eq(index, sz_size2index(sz_index2size(index)),
 	    "sz_size2index() does not reverse sz_index2size()");
-	expect_zu_eq(max_size_class, sz_index2size(
-	    sz_size2index(max_size_class)),
+	expect_zu_eq(max_size_class,
+	    sz_index2size(sz_size2index(max_size_class)),
 	    "sz_index2size() does not reverse sz_size2index()");
 
-	expect_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
+	expect_zu_eq(size_class, sz_s2u(sz_index2size(index - 1) + 1),
 	    "sz_s2u() does not round up to size class");
-	expect_zu_eq(size_class, sz_s2u(size_class-1),
+	expect_zu_eq(size_class, sz_s2u(size_class - 1),
 	    "sz_s2u() does not round up to size class");
 	expect_zu_eq(size_class, sz_s2u(size_class),
 	    "sz_s2u() does not compute same size class");
 }
 TEST_END
 
+TEST_BEGIN(test_grow_slow_size_classes) {
+	test_skip_if(!sz_large_size_classes_disabled());
+
+	size_t size = SC_LARGE_MINCLASS;
+	size_t target_usize = SC_LARGE_MINCLASS;
+	size_t max_size = get_max_size_class();
+	size_t increase[3] = {PAGE - 1, 1, 1};
+	while (size <= max_size) {
+		size_t usize = sz_s2u(size);
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[0];
+		usize = sz_s2u(size);
+		target_usize += PAGE;
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[1];
+		usize = sz_s2u(size);
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[2];
+		usize = sz_s2u(size);
+		target_usize += PAGE;
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		if (target_usize << 1 < target_usize) {
+			break;
+		}
+		target_usize = target_usize << 1;
+		size = target_usize;
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_psize_classes) {
-	size_t size_class, max_psz;
+	size_t   size_class, max_psz;
 	pszind_t pind, max_pind;
 
 	max_psz = get_max_size_class() + PAGE;
 	max_pind = sz_psz2ind(max_psz);
 
 	for (pind = 0, size_class = sz_pind2sz(pind);
-	    pind < max_pind || size_class < max_psz;
-	    pind++, size_class = sz_pind2sz(pind)) {
+	     pind < max_pind || size_class < max_psz;
+	     pind++, size_class = sz_pind2sz(pind)) {
 		expect_true(pind < max_pind,
 		    "Loop conditionals should be equivalent; pind=%u, "
-		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    pind, size_class, size_class);
 		expect_true(size_class < max_psz,
 		    "Loop conditionals should be equivalent; pind=%u, "
-		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    pind, size_class, size_class);
 
 		expect_u_eq(pind, sz_psz2ind(size_class),
 		    "sz_psz2ind() does not reverse sz_pind2sz(): pind=%u -->"
-		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
-		    size_class, sz_psz2ind(size_class),
+		    " size_class=%zu --> pind=%u --> size_class=%zu",
+		    pind, size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 		expect_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
 		    "sz_pind2sz() does not reverse sz_psz2ind(): pind=%u -->"
-		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
-		    size_class, sz_psz2ind(size_class),
+		    " size_class=%zu --> pind=%u --> size_class=%zu",
+		    pind, size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 
 		if (size_class == SC_LARGE_MAXCLASS) {
@@ -116,14 +158,15 @@ TEST_BEGIN(test_psize_classes) {
 			    "Next size_class does not round up properly");
 		}
 
-		expect_zu_eq(size_class, (pind > 0) ?
-		    sz_psz2u(sz_pind2sz(pind-1)+1) : sz_psz2u(1),
+		expect_zu_eq(size_class,
+		    (pind > 0) ? sz_psz2u(sz_pind2sz(pind - 1) + 1)
+		               : sz_psz2u(1),
 		    "sz_psz2u() does not round up to size class");
-		expect_zu_eq(size_class, sz_psz2u(size_class-1),
+		expect_zu_eq(size_class, sz_psz2u(size_class - 1),
 		    "sz_psz2u() does not round up to size class");
 		expect_zu_eq(size_class, sz_psz2u(size_class),
 		    "sz_psz2u() does not compute same size class");
-		expect_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
+		expect_zu_eq(sz_psz2u(size_class + 1), sz_pind2sz(pind + 1),
 		    "sz_psz2u() does not round up to next size class");
 	}
 
@@ -132,9 +175,9 @@ TEST_BEGIN(test_psize_classes) {
 	expect_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
 	    "sz_pind2sz() does not reverse sz_psz2ind()");
 
-	expect_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
+	expect_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind - 1) + 1),
 	    "sz_psz2u() does not round up to size class");
-	expect_zu_eq(size_class, sz_psz2u(size_class-1),
+	expect_zu_eq(size_class, sz_psz2u(size_class - 1),
 	    "sz_psz2u() does not round up to size class");
 	expect_zu_eq(size_class, sz_psz2u(size_class),
 	    "sz_psz2u() does not compute same size class");
@@ -147,31 +190,31 @@ TEST_BEGIN(test_overflow) {
 	max_size_class = get_max_size_class();
 	max_psz = max_size_class + PAGE;
 
-	expect_u_eq(sz_size2index(max_size_class+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(max_size_class + 1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	expect_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(ZU(PTRDIFF_MAX) + 1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 	expect_u_eq(sz_size2index(SIZE_T_MAX), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 
-	expect_zu_eq(sz_s2u(max_size_class+1), 0,
+	expect_zu_eq(sz_s2u(max_size_class + 1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	expect_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
+	expect_zu_eq(sz_s2u(ZU(PTRDIFF_MAX) + 1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	expect_zu_eq(sz_s2u(SIZE_T_MAX), 0,
-	    "sz_s2u() should return 0 on overflow");
+	expect_zu_eq(
+	    sz_s2u(SIZE_T_MAX), 0, "sz_s2u() should return 0 on overflow");
 
-	expect_u_eq(sz_psz2ind(max_size_class+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(max_size_class + 1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	expect_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX) + 1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 	expect_u_eq(sz_psz2ind(SIZE_T_MAX), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 
-	expect_zu_eq(sz_psz2u(max_size_class+1), max_psz,
+	expect_zu_eq(sz_psz2u(max_size_class + 1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
 	    " size");
-	expect_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
+	expect_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX) + 1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
 	    "size");
 	expect_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
@@ -181,8 +224,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_size_classes,
-	    test_psize_classes,
-	    test_overflow);
+	return test(test_size_classes, test_grow_slow_size_classes,
+	    test_psize_classes, test_overflow);
 }
diff --git a/test/unit/size_classes.sh b/test/unit/size_classes.sh
new file mode 100644
index 00000000..54363554
--- /dev/null
+++ b/test/unit/size_classes.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="disable_large_size_classes:true"
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 70fc5c7d..d98663e8 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -2,27 +2,26 @@
 
 #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
 
-TEST_BEGIN(test_arena_slab_regind) {
+TEST_BEGIN(test_bin_slab_regind) {
 	szind_t binind;
 
 	for (binind = 0; binind < SC_NBINS; binind++) {
-		size_t regind;
-		edata_t slab;
+		size_t            regind;
+		edata_t           slab;
 		const bin_info_t *bin_info = &bin_infos[binind];
 		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
-		    bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, EXTENT_PAI_PAC,
-		    EXTENT_NOT_HEAD);
-		expect_ptr_not_null(edata_addr_get(&slab),
-		    "Unexpected malloc() failure");
-		arena_dalloc_bin_locked_info_t dalloc_info;
-		arena_dalloc_bin_locked_begin(&dalloc_info, binind);
+		    bin_info->slab_size, true, binind, 0, extent_state_active,
+		    false, true, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+		expect_ptr_not_null(
+		    edata_addr_get(&slab), "Unexpected malloc() failure");
+		bin_dalloc_locked_info_t dalloc_info;
+		bin_dalloc_locked_begin(&dalloc_info, binind);
 		for (regind = 0; regind < bin_info->nregs; regind++) {
-			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
-			    (bin_info->reg_size * regind));
-			expect_zu_eq(arena_slab_regind(&dalloc_info, binind,
-			    &slab, reg),
+			void *reg = (void *)((uintptr_t)edata_addr_get(&slab)
+			    + (bin_info->reg_size * regind));
+			expect_zu_eq(
+			    bin_slab_regind(&dalloc_info, binind, &slab, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
@@ -34,6 +33,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_arena_slab_regind);
+	return test(test_bin_slab_regind);
 }
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 588c9f44..3686ca74 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -1,9 +1,8 @@
 #include "test/jemalloc_test.h"
 
 static const uint64_t smoothstep_tab[] = {
-#define STEP(step, h, x, y)			\
-	h,
-	SMOOTHSTEP
+#define STEP(step, h, x, y) h,
+    SMOOTHSTEP
 #undef STEP
 };
 
@@ -23,14 +22,14 @@ TEST_BEGIN(test_smoothstep_integral) {
 		sum += smoothstep_tab[i];
 	}
 
-	max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1);
+	max = (KQU(1) << (SMOOTHSTEP_BFP - 1)) * (SMOOTHSTEP_NSTEPS + 1);
 	min = max - SMOOTHSTEP_NSTEPS;
 
-	expect_u64_ge(sum, min,
-	    "Integral too small, even accounting for truncation");
+	expect_u64_ge(
+	    sum, min, "Integral too small, even accounting for truncation");
 	expect_u64_le(sum, max, "Integral exceeds 1/2");
 	if (false) {
-		malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n",
+		malloc_printf("%" FMTu64 " ulps under 1/2 (limit %d)\n",
 		    max - sum, SMOOTHSTEP_NSTEPS);
 	}
 }
@@ -52,7 +51,7 @@ TEST_BEGIN(test_smoothstep_monotonic) {
 		expect_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
 		prev_h = h;
 	}
-	expect_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
+	expect_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS - 1],
 	    (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1");
 }
 TEST_END
@@ -74,19 +73,21 @@ TEST_BEGIN(test_smoothstep_slope) {
 		uint64_t delta = h - prev_h;
 		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically increase in 0.0 <= x <= 0.5, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 		prev_h = h;
 		prev_delta = delta;
 	}
 
 	prev_h = KQU(1) << SMOOTHSTEP_BFP;
 	prev_delta = 0;
-	for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
+	for (i = SMOOTHSTEP_NSTEPS - 1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
 		uint64_t h = smoothstep_tab[i];
 		uint64_t delta = prev_h - h;
 		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically decrease in 0.5 <= x <= 1.0, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 		prev_h = h;
 		prev_delta = delta;
 	}
@@ -95,8 +96,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_smoothstep_integral,
-	    test_smoothstep_monotonic,
+	return test(test_smoothstep_integral, test_smoothstep_monotonic,
 	    test_smoothstep_slope);
 }
diff --git a/test/unit/spin.c b/test/unit/spin.c
index b965f742..6dbd0dd1 100644
--- a/test/unit/spin.c
+++ b/test/unit/spin.c
@@ -13,6 +13,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_spin);
+	return test(test_spin);
 }
diff --git a/test/unit/stats.c b/test/unit/stats.c
index bbdbd180..d2719db2 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -1,15 +1,19 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/arena_structs.h"
+
 #define STRINGIFY_HELPER(x) #x
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
 
 TEST_BEGIN(test_stats_summary) {
-	size_t sz, allocated, active, resident, mapped;
+	size_t sz, allocated, active, resident, mapped, metadata,
+	    metadata_edata, metadata_rtree;
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
-	    0), expected, "Unexpected mallctl() result");
+	expect_d_eq(
+	    mallctl("stats.allocated", (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
@@ -17,24 +21,36 @@ TEST_BEGIN(test_stats_summary) {
 	expect_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 
+	expect_d_eq(mallctl("stats.metadata", (void *)&metadata, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
+	expect_d_eq(mallctl("stats.metadata_edata", (void *)&metadata_edata,
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
+	expect_d_eq(mallctl("stats.metadata_rtree", (void *)&metadata_rtree,
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
+
 	if (config_stats) {
 		expect_zu_le(allocated, active,
 		    "allocated should be no larger than active");
-		expect_zu_lt(active, resident,
-		    "active should be less than resident");
-		expect_zu_lt(active, mapped,
-		    "active should be less than mapped");
+		expect_zu_lt(
+		    active, resident, "active should be less than resident");
+		expect_zu_lt(
+		    active, mapped, "active should be less than mapped");
+		expect_zu_le(metadata_edata + metadata_rtree, metadata,
+		    "the sum of metadata_edata and metadata_rtree "
+		    "should be no larger than metadata");
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_stats_large) {
-	void *p;
+	void    *p;
 	uint64_t epoch;
-	size_t allocated;
+	size_t   allocated;
 	uint64_t nmalloc, ndalloc, nrequests;
-	size_t sz;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   sz;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	p = mallocx(SC_SMALL_MAXCLASS + 1, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -44,20 +60,22 @@ TEST_BEGIN(test_stats_large) {
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.nrequests",
-	    (void *)&nrequests, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nrequests, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
-		    "allocated should be greater than zero");
+		expect_zu_gt(
+		    allocated, 0, "allocated should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 		expect_u64_le(nmalloc, nrequests,
@@ -69,18 +87,17 @@ TEST_BEGIN(test_stats_large) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_summary) {
-	void *little, *large;
+	void    *little, *large;
 	uint64_t epoch;
-	size_t sz;
-	int expected = config_stats ? 0 : ENOENT;
-	size_t mapped;
+	size_t   sz;
+	int      expected = config_stats ? 0 : ENOENT;
+	size_t   mapped;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 
 	little = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	expect_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx((1U << SC_LG_LARGE_MINCLASS),
-	    MALLOCX_ARENA(0));
+	large = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
 	expect_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
@@ -95,28 +112,29 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
-	    0), expected, "Unexepected mallctl() result");
+	expect_d_eq(
+	    mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.dirty_npurge",
-	    (void *)&dirty_npurge, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_npurge, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
-	    (void *)&dirty_nmadvise, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_nmadvise, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.dirty_purged",
-	    (void *)&dirty_purged, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_purged, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
-	    (void *)&muzzy_npurge, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_npurge, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
-	    (void *)&muzzy_nmadvise, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_nmadvise, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_purged",
-	    (void *)&muzzy_purged, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_purged, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 
 	if (config_stats) {
 		if (!is_background_thread_enabled() && !opt_hpa) {
@@ -145,10 +163,10 @@ no_lazy_lock(void) {
 }
 
 TEST_BEGIN(test_stats_arenas_small) {
-	void *p;
-	size_t sz, allocated;
+	void    *p;
+	size_t   sz, allocated;
 	uint64_t epoch, nmalloc, ndalloc, nrequests;
-	int expected = config_stats ? 0 : ENOENT;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
@@ -163,26 +181,28 @@ TEST_BEGIN(test_stats_arenas_small) {
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.small.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.small.nrequests",
-	    (void *)&nrequests, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nrequests, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
-		    "allocated should be greater than zero");
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be no greater than zero");
+		expect_zu_gt(
+		    allocated, 0, "allocated should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be no greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		expect_u64_gt(nrequests, 0,
-		    "nrequests should be greater than zero");
+		expect_u64_gt(
+		    nrequests, 0, "nrequests should be greater than zero");
 	}
 
 	dallocx(p, 0);
@@ -190,32 +210,41 @@ TEST_BEGIN(test_stats_arenas_small) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_large) {
-	void *p;
-	size_t sz, allocated;
+	void    *p;
+	size_t   sz, allocated, allocated_before;
 	uint64_t epoch, nmalloc, ndalloc;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   malloc_size = (1U << (SC_LG_LARGE_MINCLASS + 1)) + 1;
+	int      expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
+	sz = sizeof(size_t);
+	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
+	                (void *)&allocated_before, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
+
+	p = mallocx(malloc_size, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
-	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
+		expect_zu_ge(allocated_before, 0,
 		    "allocated should be greater than zero");
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_zu_ge(allocated - allocated_before, sz_s2u(malloc_size),
+		    "the diff between allocated should be greater than the allocation made");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
@@ -230,11 +259,11 @@ gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) {
 }
 
 TEST_BEGIN(test_stats_arenas_bins) {
-	void *p;
-	size_t sz, curslabs, curregs, nonfull_slabs;
+	void    *p;
+	size_t   sz, curslabs, curregs, nonfull_slabs;
 	uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
 	uint64_t nslabs, nreslabs;
-	int expected = config_stats ? 0 : ENOENT;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	/* Make sure allocation below isn't satisfied by tcache. */
 	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
@@ -246,8 +275,8 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	    0, "Arena creation failure");
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-	    (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 
 	p = malloc(bin_infos[0].reg_size);
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
@@ -297,26 +326,25 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		expect_u64_gt(nrequests, 0,
-		    "nrequests should be greater than zero");
-		expect_zu_gt(curregs, 0,
-		    "allocated should be greater than zero");
+		expect_u64_gt(
+		    nrequests, 0, "nrequests should be greater than zero");
+		expect_zu_gt(
+		    curregs, 0, "allocated should be greater than zero");
 		if (opt_tcache) {
 			expect_u64_gt(nfills, 0,
 			    "At least one fill should have occurred");
 			expect_u64_gt(nflushes, 0,
 			    "At least one flush should have occurred");
 		}
-		expect_u64_gt(nslabs, 0,
-		    "At least one slab should have been allocated");
+		expect_u64_gt(
+		    nslabs, 0, "At least one slab should have been allocated");
 		expect_zu_gt(curslabs, 0,
 		    "At least one slab should be currently allocated");
-		expect_zu_eq(nonfull_slabs, 0,
-		    "slabs_nonfull should be empty");
+		expect_zu_eq(nonfull_slabs, 0, "slabs_nonfull should be empty");
 	}
 
 	dallocx(p, 0);
@@ -324,14 +352,15 @@ TEST_BEGIN(test_stats_arenas_bins) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_lextents) {
-	void *p;
+	void    *p;
 	uint64_t epoch, nmalloc, ndalloc;
-	size_t curlextents, sz, hsize;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   curlextents, sz, hsize;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
 	p = mallocx(hsize, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -341,19 +370,19 @@ TEST_BEGIN(test_stats_arenas_lextents) {
 
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
-	    (void *)&nmalloc, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nmalloc, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
-	    (void *)&ndalloc, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&ndalloc, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
-	    (void *)&curlextents, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&curlextents, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 		expect_u64_gt(curlextents, 0,
@@ -367,35 +396,37 @@ TEST_END
 static void
 test_tcache_bytes_for_usize(size_t usize) {
 	uint64_t epoch;
-	size_t tcache_bytes, tcache_stashed_bytes;
-	size_t sz = sizeof(tcache_bytes);
+	size_t   tcache_bytes, tcache_stashed_bytes;
+	size_t   sz = sizeof(tcache_bytes);
 
 	void *ptr = mallocx(usize, 0);
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	size_t tcache_bytes_before = tcache_bytes + tcache_stashed_bytes;
 	dallocx(ptr, 0);
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	size_t tcache_bytes_after = tcache_bytes + tcache_stashed_bytes;
-	assert_zu_eq(tcache_bytes_after - tcache_bytes_before,
-	    usize, "Incorrectly attributed a free");
+	assert_zu_eq(tcache_bytes_after - tcache_bytes_before, usize,
+	    "Incorrectly attributed a free");
 }
 
 TEST_BEGIN(test_stats_tcache_bytes_small) {
@@ -416,16 +447,113 @@ TEST_BEGIN(test_stats_tcache_bytes_large) {
 }
 TEST_END
 
+TEST_BEGIN(test_approximate_stats_active) {
+	/*
+	 * Test 1: create a manual arena that we exclusively control and use it
+	 * to verify the values returned by pa_shard_nactive() is accurate.
+	 * This also helps verify the correctness of approximate_stats.active
+	 * since it simply sums the pa_shard_nactive() of all arenas.
+	 */
+	tsdn_t  *tsdn = tsdn_fetch();
+	unsigned arena_ind;
+	size_t   sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Arena creation failed");
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	expect_ptr_not_null(arena, "Failed to get arena");
+
+	size_t nactive_initial = pa_shard_nactive(&arena->pa_shard);
+
+	/*
+	 * Allocate a small size from this arena.  Use MALLOCX_TCACHE_NONE
+	 * to bypass tcache and ensure the allocation goes directly to the
+	 * arena's pa_shard.
+	 */
+	size_t small_alloc_size = 128;
+	void  *p_small = mallocx(
+            small_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p_small, "Unexpected mallocx() failure for small");
+
+	size_t nactive_after_small = pa_shard_nactive(&arena->pa_shard);
+	/*
+	 * For small allocations, jemalloc allocates a slab.  The slab size can
+	 * be looked up via bin_infos[szind].slab_size.  The assertion allows
+	 * for extra overhead from profiling, HPA, or sanitizer guard pages.
+	 */
+	size_t small_usize = nallocx(
+	    small_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	szind_t small_szind = sz_size2index(small_usize);
+	size_t  expected_small_pages = bin_infos[small_szind].slab_size / PAGE;
+	expect_zu_ge(nactive_after_small - nactive_initial,
+	    expected_small_pages,
+	    "nactive increase should be at least the slab size in pages");
+
+	/*
+	 * Allocate a large size from this arena.
+	 */
+	size_t large_alloc_size = SC_LARGE_MINCLASS;
+	void  *p_large = mallocx(
+            large_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p_large, "Unexpected mallocx() failure for large");
+
+	size_t nactive_after_large = pa_shard_nactive(&arena->pa_shard);
+	/*
+	 * For large allocations, the increase in pa_shard_nactive should be at
+	 * least the allocation size in pages with sz_large_pad considered.
+	 * The assertion allows for extra overhead from profiling, HPA, or
+	 * sanitizer guard pages.
+	 */
+	size_t large_usize = nallocx(
+	    large_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	size_t expected_large_pages = (large_usize + sz_large_pad) / PAGE;
+	expect_zu_ge(nactive_after_large - nactive_after_small,
+	    expected_large_pages,
+	    "nactive increase should be at least the large allocation size in pages");
+
+	/*
+	 * Deallocate both allocations and verify nactive returns to the
+	 * original value.
+	 */
+	dallocx(p_small, MALLOCX_TCACHE_NONE);
+	dallocx(p_large, MALLOCX_TCACHE_NONE);
+
+	size_t nactive_final = pa_shard_nactive(&arena->pa_shard);
+	expect_zu_ge(nactive_final - nactive_after_large,
+	    expected_small_pages + expected_large_pages,
+	    "nactive should return to original value after deallocation");
+
+	/*
+	 * Test 2: allocate a large allocation in the auto arena and confirm
+	 * that approximate_stats.active increases.  Since there may be other
+	 * allocs/dallocs going on, cannot make more accurate assertions like
+	 * Test 1.
+	 */
+	size_t approximate_active_before = 0;
+	size_t approximate_active_after = 0;
+	sz = sizeof(size_t);
+	expect_d_eq(mallctl("approximate_stats.active",
+	                (void *)&approximate_active_before, &sz, NULL, 0),
+	    0, "Unexpected mallctl() result");
+
+	void *p0 = mallocx(4 * SC_SMALL_MAXCLASS, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p0, "Unexpected mallocx() failure");
+
+	expect_d_eq(mallctl("approximate_stats.active",
+	                (void *)&approximate_active_after, &sz, NULL, 0),
+	    0, "Unexpected mallctl() result");
+	expect_zu_gt(approximate_active_after, approximate_active_before,
+	    "approximate_stats.active should increase after the allocation");
+
+	free(p0);
+}
+TEST_END
+
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_stats_summary,
-	    test_stats_large,
-	    test_stats_arenas_summary,
-	    test_stats_arenas_small,
-	    test_stats_arenas_large,
-	    test_stats_arenas_bins,
-	    test_stats_arenas_lextents,
-	    test_stats_tcache_bytes_small,
-	    test_stats_tcache_bytes_large);
+	return test_no_reentrancy(test_stats_summary, test_stats_large,
+	    test_stats_arenas_summary, test_stats_arenas_small,
+	    test_stats_arenas_large, test_stats_arenas_bins,
+	    test_stats_arenas_lextents, test_stats_tcache_bytes_small,
+	    test_stats_tcache_bytes_large, test_approximate_stats_active);
 }
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 3b317753..e611369c 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -21,22 +21,22 @@ typedef enum {
 
 typedef struct parser_s parser_t;
 typedef struct {
-	parser_t	*parser;
-	token_type_t	token_type;
-	size_t		pos;
-	size_t		len;
-	size_t		line;
-	size_t		col;
+	parser_t    *parser;
+	token_type_t token_type;
+	size_t       pos;
+	size_t       len;
+	size_t       line;
+	size_t       col;
 } token_t;
 
 struct parser_s {
-	bool verbose;
-	char	*buf; /* '\0'-terminated. */
-	size_t	len; /* Number of characters preceding '\0' in buf. */
-	size_t	pos;
-	size_t	line;
-	size_t	col;
-	token_t	token;
+	bool    verbose;
+	char   *buf; /* '\0'-terminated. */
+	size_t  len; /* Number of characters preceding '\0' in buf. */
+	size_t  pos;
+	size_t  line;
+	size_t  col;
+	token_t token;
 };
 
 static void
@@ -63,12 +63,12 @@ token_error(token_t *token) {
 		    token->line, token->col);
 		break;
 	default:
-		malloc_printf("%zu:%zu: Unexpected token: ", token->line,
-		    token->col);
+		malloc_printf(
+		    "%zu:%zu: Unexpected token: ", token->line, token->col);
 		break;
 	}
-	UNUSED ssize_t err = malloc_write_fd(STDERR_FILENO,
-	    &token->parser->buf[token->pos], token->len);
+	UNUSED ssize_t err = malloc_write_fd(
+	    STDERR_FILENO, &token->parser->buf[token->pos], token->len);
 	malloc_printf("\n");
 }
 
@@ -92,9 +92,9 @@ parser_fini(parser_t *parser) {
 static bool
 parser_append(parser_t *parser, const char *str) {
 	size_t len = strlen(str);
-	char *buf = (parser->buf == NULL) ? mallocx(len + 1,
-	    MALLOCX_TCACHE_NONE) : rallocx(parser->buf, parser->len + len + 1,
-	    MALLOCX_TCACHE_NONE);
+	char  *buf = (parser->buf == NULL)
+	     ? mallocx(len + 1, MALLOCX_TCACHE_NONE)
+	     : rallocx(parser->buf, parser->len + len + 1, MALLOCX_TCACHE_NONE);
 	if (buf == NULL) {
 		return true;
 	}
@@ -109,9 +109,19 @@ parser_tokenize(parser_t *parser) {
 	enum {
 		STATE_START,
 		STATE_EOI,
-		STATE_N, STATE_NU, STATE_NUL, STATE_NULL,
-		STATE_F, STATE_FA, STATE_FAL, STATE_FALS, STATE_FALSE,
-		STATE_T, STATE_TR, STATE_TRU, STATE_TRUE,
+		STATE_N,
+		STATE_NU,
+		STATE_NUL,
+		STATE_NULL,
+		STATE_F,
+		STATE_FA,
+		STATE_FAL,
+		STATE_FALS,
+		STATE_FALSE,
+		STATE_T,
+		STATE_TR,
+		STATE_TRU,
+		STATE_TRUE,
 		STATE_LBRACKET,
 		STATE_RBRACKET,
 		STATE_LBRACE,
@@ -120,7 +130,10 @@ parser_tokenize(parser_t *parser) {
 		STATE_COMMA,
 		STATE_CHARS,
 		STATE_CHAR_ESCAPE,
-		STATE_CHAR_U, STATE_CHAR_UD, STATE_CHAR_UDD, STATE_CHAR_UDDD,
+		STATE_CHAR_U,
+		STATE_CHAR_UD,
+		STATE_CHAR_UDD,
+		STATE_CHAR_UDDD,
 		STATE_STRING,
 		STATE_MINUS,
 		STATE_LEADING_ZERO,
@@ -132,12 +145,12 @@ parser_tokenize(parser_t *parser) {
 		STATE_EXP_DIGITS,
 		STATE_ACCEPT
 	} state = STATE_START;
-	size_t token_pos JEMALLOC_CC_SILENCE_INIT(0);
+	size_t token_pos  JEMALLOC_CC_SILENCE_INIT(0);
 	size_t token_line JEMALLOC_CC_SILENCE_INIT(1);
-	size_t token_col JEMALLOC_CC_SILENCE_INIT(0);
+	size_t token_col  JEMALLOC_CC_SILENCE_INIT(0);
 
-	expect_zu_le(parser->pos, parser->len,
-	    "Position is past end of buffer");
+	expect_zu_le(
+	    parser->pos, parser->len, "Position is past end of buffer");
 
 	while (state != STATE_ACCEPT) {
 		char c = parser->buf[parser->pos];
@@ -148,7 +161,11 @@ parser_tokenize(parser_t *parser) {
 			token_line = parser->line;
 			token_col = parser->col;
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 				break;
 			case '\0':
 				state = STATE_EOI;
@@ -189,21 +206,29 @@ parser_tokenize(parser_t *parser) {
 			case '0':
 				state = STATE_LEADING_ZERO;
 				break;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EOI:
-			token_init(&parser->token, parser,
-			    TOKEN_TYPE_EOI, token_pos, parser->pos -
-			    token_pos, token_line, token_col);
+			token_init(&parser->token, parser, TOKEN_TYPE_EOI,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
 			state = STATE_ACCEPT;
 			break;
 		case STATE_N:
@@ -213,8 +238,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -225,8 +251,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -237,22 +264,32 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_NULL:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			token_init(&parser->token, parser, TOKEN_TYPE_NULL,
@@ -267,8 +304,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -279,8 +317,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -291,8 +330,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -303,27 +343,37 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_FALSE:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
-			token_init(&parser->token, parser,
-			    TOKEN_TYPE_FALSE, token_pos, parser->pos -
-			    token_pos, token_line, token_col);
+			token_init(&parser->token, parser, TOKEN_TYPE_FALSE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
 			state = STATE_ACCEPT;
 			break;
 		case STATE_T:
@@ -333,8 +383,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -345,8 +396,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -357,22 +409,32 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_TRUE:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			token_init(&parser->token, parser, TOKEN_TYPE_TRUE,
@@ -424,16 +486,42 @@ parser_tokenize(parser_t *parser) {
 			case '"':
 				state = STATE_STRING;
 				break;
-			case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
-			case 0x05: case 0x06: case 0x07: case 0x08: case 0x09:
-			case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e:
-			case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
-			case 0x14: case 0x15: case 0x16: case 0x17: case 0x18:
-			case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
-			case 0x1e: case 0x1f:
+			case 0x00:
+			case 0x01:
+			case 0x02:
+			case 0x03:
+			case 0x04:
+			case 0x05:
+			case 0x06:
+			case 0x07:
+			case 0x08:
+			case 0x09:
+			case 0x0a:
+			case 0x0b:
+			case 0x0c:
+			case 0x0d:
+			case 0x0e:
+			case 0x0f:
+			case 0x10:
+			case 0x11:
+			case 0x12:
+			case 0x13:
+			case 0x14:
+			case 0x15:
+			case 0x16:
+			case 0x17:
+			case 0x18:
+			case 0x19:
+			case 0x1a:
+			case 0x1b:
+			case 0x1c:
+			case 0x1d:
+			case 0x1e:
+			case 0x1f:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			default:
 				break;
@@ -441,8 +529,13 @@ parser_tokenize(parser_t *parser) {
 			break;
 		case STATE_CHAR_ESCAPE:
 			switch (c) {
-			case '"': case '\\': case '/': case 'b': case 'n':
-			case 'r': case 't':
+			case '"':
+			case '\\':
+			case '/':
+			case 'b':
+			case 'n':
+			case 'r':
+			case 't':
 				state = STATE_CHARS;
 				break;
 			case 'u':
@@ -450,76 +543,145 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_U:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UDD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UDD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UDDD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UDDD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHARS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -534,14 +696,22 @@ parser_tokenize(parser_t *parser) {
 			case '0':
 				state = STATE_LEADING_ZERO;
 				break;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -552,95 +722,152 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
 			case '.':
 				state = STATE_DECIMAL;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_DECIMAL:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_FRAC_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_FRAC_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
-			case 'e': case 'E':
+			case 'e':
+			case 'E':
 				state = STATE_EXP;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_EXP:
 			switch (c) {
-			case '-': case '+':
+			case '-':
+			case '+':
 				state = STATE_EXP_SIGN;
 				break;
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_EXP_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EXP_SIGN:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_EXP_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EXP_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
@@ -662,8 +889,8 @@ parser_tokenize(parser_t *parser) {
 	return false;
 }
 
-static bool	parser_parse_array(parser_t *parser);
-static bool	parser_parse_object(parser_t *parser);
+static bool parser_parse_array(parser_t *parser);
+static bool parser_parse_object(parser_t *parser);
 
 static bool
 parser_parse_value(parser_t *parser) {
@@ -824,80 +1051,80 @@ label_error:
 }
 
 TEST_BEGIN(test_json_parser) {
-	size_t i;
+	size_t      i;
 	const char *invalid_inputs[] = {
-		/* Tokenizer error case tests. */
-		"{ \"string\": X }",
-		"{ \"string\": nXll }",
-		"{ \"string\": nuXl }",
-		"{ \"string\": nulX }",
-		"{ \"string\": nullX }",
-		"{ \"string\": fXlse }",
-		"{ \"string\": faXse }",
-		"{ \"string\": falXe }",
-		"{ \"string\": falsX }",
-		"{ \"string\": falseX }",
-		"{ \"string\": tXue }",
-		"{ \"string\": trXe }",
-		"{ \"string\": truX }",
-		"{ \"string\": trueX }",
-		"{ \"string\": \"\n\" }",
-		"{ \"string\": \"\\z\" }",
-		"{ \"string\": \"\\uX000\" }",
-		"{ \"string\": \"\\u0X00\" }",
-		"{ \"string\": \"\\u00X0\" }",
-		"{ \"string\": \"\\u000X\" }",
-		"{ \"string\": -X }",
-		"{ \"string\": 0.X }",
-		"{ \"string\": 0.0eX }",
-		"{ \"string\": 0.0e+X }",
+	    /* Tokenizer error case tests. */
+	    "{ \"string\": X }",
+	    "{ \"string\": nXll }",
+	    "{ \"string\": nuXl }",
+	    "{ \"string\": nulX }",
+	    "{ \"string\": nullX }",
+	    "{ \"string\": fXlse }",
+	    "{ \"string\": faXse }",
+	    "{ \"string\": falXe }",
+	    "{ \"string\": falsX }",
+	    "{ \"string\": falseX }",
+	    "{ \"string\": tXue }",
+	    "{ \"string\": trXe }",
+	    "{ \"string\": truX }",
+	    "{ \"string\": trueX }",
+	    "{ \"string\": \"\n\" }",
+	    "{ \"string\": \"\\z\" }",
+	    "{ \"string\": \"\\uX000\" }",
+	    "{ \"string\": \"\\u0X00\" }",
+	    "{ \"string\": \"\\u00X0\" }",
+	    "{ \"string\": \"\\u000X\" }",
+	    "{ \"string\": -X }",
+	    "{ \"string\": 0.X }",
+	    "{ \"string\": 0.0eX }",
+	    "{ \"string\": 0.0e+X }",
 
-		/* Parser error test cases. */
-		"{\"string\": }",
-		"{\"string\" }",
-		"{\"string\": [ 0 }",
-		"{\"string\": {\"a\":0, 1 } }",
-		"{\"string\": {\"a\":0: } }",
-		"{",
-		"{}{",
+	    /* Parser error test cases. */
+	    "{\"string\": }",
+	    "{\"string\" }",
+	    "{\"string\": [ 0 }",
+	    "{\"string\": {\"a\":0, 1 } }",
+	    "{\"string\": {\"a\":0: } }",
+	    "{",
+	    "{}{",
 	};
 	const char *valid_inputs[] = {
-		/* Token tests. */
-		"null",
-		"false",
-		"true",
-		"{}",
-		"{\"a\": 0}",
-		"[]",
-		"[0, 1]",
-		"0",
-		"1",
-		"10",
-		"-10",
-		"10.23",
-		"10.23e4",
-		"10.23e-4",
-		"10.23e+4",
-		"10.23E4",
-		"10.23E-4",
-		"10.23E+4",
-		"-10.23",
-		"-10.23e4",
-		"-10.23e-4",
-		"-10.23e+4",
-		"-10.23E4",
-		"-10.23E-4",
-		"-10.23E+4",
-		"\"value\"",
-		"\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
+	    /* Token tests. */
+	    "null",
+	    "false",
+	    "true",
+	    "{}",
+	    "{\"a\": 0}",
+	    "[]",
+	    "[0, 1]",
+	    "0",
+	    "1",
+	    "10",
+	    "-10",
+	    "10.23",
+	    "10.23e4",
+	    "10.23e-4",
+	    "10.23e+4",
+	    "10.23E4",
+	    "10.23E-4",
+	    "10.23E+4",
+	    "-10.23",
+	    "-10.23e4",
+	    "-10.23e-4",
+	    "-10.23e+4",
+	    "-10.23E4",
+	    "-10.23E-4",
+	    "-10.23E+4",
+	    "\"value\"",
+	    "\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
 
-		/* Parser test with various nesting. */
-		"{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
+	    /* Parser test with various nesting. */
+	    "{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
 	};
 
-	for (i = 0; i < sizeof(invalid_inputs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(invalid_inputs) / sizeof(const char *); i++) {
 		const char *input = invalid_inputs[i];
-		parser_t parser;
+		parser_t    parser;
 		parser_init(&parser, false);
 		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
@@ -906,9 +1133,9 @@ TEST_BEGIN(test_json_parser) {
 		parser_fini(&parser);
 	}
 
-	for (i = 0; i < sizeof(valid_inputs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(valid_inputs) / sizeof(const char *); i++) {
 		const char *input = valid_inputs[i];
-		parser_t parser;
+		parser_t    parser;
 		parser_init(&parser, true);
 		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
@@ -929,27 +1156,27 @@ write_cb(void *opaque, const char *str) {
 
 TEST_BEGIN(test_stats_print_json) {
 	const char *opts[] = {
-		"J",
-		"Jg",
-		"Jm",
-		"Jd",
-		"Jmd",
-		"Jgd",
-		"Jgm",
-		"Jgmd",
-		"Ja",
-		"Jb",
-		"Jl",
-		"Jx",
-		"Jbl",
-		"Jal",
-		"Jab",
-		"Jabl",
-		"Jax",
-		"Jbx",
-		"Jlx",
-		"Jablx",
-		"Jgmdablx",
+	    "J",
+	    "Jg",
+	    "Jm",
+	    "Jd",
+	    "Jmd",
+	    "Jgd",
+	    "Jgm",
+	    "Jgmd",
+	    "Ja",
+	    "Jb",
+	    "Jl",
+	    "Jx",
+	    "Jbl",
+	    "Jal",
+	    "Jab",
+	    "Jabl",
+	    "Jax",
+	    "Jbx",
+	    "Jlx",
+	    "Jablx",
+	    "Jgmdablx",
 	};
 	unsigned arena_ind, i;
 
@@ -962,23 +1189,27 @@ TEST_BEGIN(test_stats_print_json) {
 		case 1: {
 			size_t sz = sizeof(arena_ind);
 			expect_d_eq(mallctl("arenas.create", (void *)&arena_ind,
-			    &sz, NULL, 0), 0, "Unexpected mallctl failure");
+			                &sz, NULL, 0),
+			    0, "Unexpected mallctl failure");
 			break;
-		} case 2: {
+		}
+		case 2: {
 			size_t mib[3];
-			size_t miblen = sizeof(mib)/sizeof(size_t);
-			expect_d_eq(mallctlnametomib("arena.0.destroy",
-			    mib, &miblen), 0,
-			    "Unexpected mallctlnametomib failure");
+			size_t miblen = sizeof(mib) / sizeof(size_t);
+			expect_d_eq(
+			    mallctlnametomib("arena.0.destroy", mib, &miblen),
+			    0, "Unexpected mallctlnametomib failure");
 			mib[1] = arena_ind;
-			expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
-			    0), 0, "Unexpected mallctlbymib failure");
+			expect_d_eq(
+			    mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+			    "Unexpected mallctlbymib failure");
 			break;
-		} default:
+		}
+		default:
 			not_reached();
 		}
 
-		for (j = 0; j < sizeof(opts)/sizeof(const char *); j++) {
+		for (j = 0; j < sizeof(opts) / sizeof(const char *); j++) {
 			parser_t parser;
 
 			parser_init(&parser, true);
@@ -993,7 +1224,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_json_parser,
-	    test_stats_print_json);
+	return test(test_json_parser, test_stats_print_json);
 }
diff --git a/test/unit/sz.c b/test/unit/sz.c
index 8ae04b92..fa2b8dc0 100644
--- a/test/unit/sz.c
+++ b/test/unit/sz.c
@@ -10,8 +10,8 @@ TEST_BEGIN(test_sz_psz2ind) {
 	for (size_t i = 0; i < SC_NGROUP; i++) {
 		for (size_t psz = i * PAGE + 1; psz <= (i + 1) * PAGE; psz++) {
 			pszind_t ind = sz_psz2ind(psz);
-			expect_zu_eq(ind, i, "Got %u as sz_psz2ind of %zu", ind,
-			    psz);
+			expect_zu_eq(
+			    ind, i, "Got %u as sz_psz2ind of %zu", ind, psz);
 		}
 	}
 
@@ -25,15 +25,14 @@ TEST_BEGIN(test_sz_psz2ind) {
 	 */
 	size_t base_psz = 1 << (SC_LG_NGROUP + LG_PAGE);
 	size_t base_ind = 0;
-	while (base_ind < SC_NSIZES &&
-	    reg_size_compute(data.sc[base_ind].lg_base,
-		data.sc[base_ind].lg_delta,
-		data.sc[base_ind].ndelta) < base_psz) {
+	while (base_ind < SC_NSIZES
+	    && reg_size_compute(data.sc[base_ind].lg_base,
+	           data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta)
+	        < base_psz) {
 		base_ind++;
 	}
-	expect_zu_eq(
-	    reg_size_compute(data.sc[base_ind].lg_base,
-		data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta),
+	expect_zu_eq(reg_size_compute(data.sc[base_ind].lg_base,
+	                 data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta),
 	    base_psz, "Size class equal to %zu not found", base_psz);
 	/*
 	 * Test different sizes falling into groups after the 'base'. The
@@ -42,21 +41,21 @@ TEST_BEGIN(test_sz_psz2ind) {
 	base_ind -= SC_NGROUP;
 	for (size_t psz = base_psz; psz <= 64 * 1024 * 1024; psz += PAGE / 3) {
 		pszind_t ind = sz_psz2ind(psz);
-		sc_t gt_sc = data.sc[ind + base_ind];
+		sc_t     gt_sc = data.sc[ind + base_ind];
 		expect_zu_gt(psz,
-		    reg_size_compute(gt_sc.lg_base, gt_sc.lg_delta,
-			gt_sc.ndelta),
+		    reg_size_compute(
+		        gt_sc.lg_base, gt_sc.lg_delta, gt_sc.ndelta),
 		    "Got %u as sz_psz2ind of %zu", ind, psz);
 		sc_t le_sc = data.sc[ind + base_ind + 1];
 		expect_zu_le(psz,
-		    reg_size_compute(le_sc.lg_base, le_sc.lg_delta,
-			le_sc.ndelta),
+		    reg_size_compute(
+		        le_sc.lg_base, le_sc.lg_delta, le_sc.ndelta),
 		    "Got %u as sz_psz2ind of %zu", ind, psz);
 	}
 
 	pszind_t max_ind = sz_psz2ind(SC_LARGE_MAXCLASS + 1);
-	expect_lu_eq(max_ind, SC_NPSIZES,
-	    "Got %u as sz_psz2ind of %llu", max_ind, SC_LARGE_MAXCLASS);
+	expect_lu_eq(max_ind, SC_NPSIZES, "Got %u as sz_psz2ind of %llu",
+	    max_ind, SC_LARGE_MAXCLASS);
 }
 TEST_END
 
diff --git a/test/unit/tcache_init.c b/test/unit/tcache_init.c
new file mode 100644
index 00000000..11d4b654
--- /dev/null
+++ b/test/unit/tcache_init.c
@@ -0,0 +1,116 @@
+#include "test/jemalloc_test.h"
+
+static void *
+tcache_stack_alloc_fail(tsdn_t *tsdn, size_t size, size_t alignment) {
+	return NULL;
+}
+
+TEST_BEGIN(test_tcache_data_init_oom) {
+	bool orig_opt_abort = opt_abort;
+	void *(*orig_tcache_stack_alloc)(tsdn_t *, size_t, size_t) =
+	    tcache_stack_alloc;
+
+	opt_abort = false;
+	tcache_stack_alloc = tcache_stack_alloc_fail;
+
+	/*
+	 * Trigger init through tcache_enabled_set by enabling and
+	 * disabling the tcache.
+	 */
+	bool e0, e1;
+	size_t bool_sz = sizeof(bool);
+
+	/* Disable the tcache. */
+	e1 = false;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* Try to enable the tcache.  Initialization should fail. */
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* The tcache should be disabled. */
+	tsd_t *tsd = tsd_fetch();
+	expect_false(tsd_tcache_enabled_get(tsd),
+	    "tcache should be disabled after init failure");
+
+	/* Allocations should go to the arena. */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc should succeed without tcache");
+	free(p);
+
+	/* Restore the original values */
+	tcache_stack_alloc = orig_tcache_stack_alloc;
+	opt_abort = orig_opt_abort;
+
+	/*
+	 * Try to enable the tcache again.  This time initialization
+	 * should succeed.
+	 */
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+}
+TEST_END
+
+TEST_BEGIN(test_tcache_reinit_oom) {
+	bool orig_opt_abort = opt_abort;
+	void *(*orig_tcache_stack_alloc)(tsdn_t *, size_t, size_t) =
+	    tcache_stack_alloc;
+
+	/* Read current tcache max. */
+	size_t old_tcache_max, sz;
+	sz = sizeof(old_tcache_max);
+	expect_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	opt_abort = false;
+	tcache_stack_alloc = tcache_stack_alloc_fail;
+
+	/*
+	 * Setting thread.tcache.max causes a reinitialization.  With
+	 * the thread_stack_alloc override reinitialization should
+	 * fail and disable tcache.
+	 */
+	size_t new_tcache_max = 1024;
+	new_tcache_max = sz_s2u(new_tcache_max);
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	    (void *)&new_tcache_max, sizeof(new_tcache_max)), 0,
+	    "Unexpected mallctl failure");
+
+	/* Check that the tcache was disabled. */
+	tsd_t *tsd = tsd_fetch();
+	expect_false(tsd_tcache_enabled_get(tsd),
+	    "tcache should be disabled after reinit failure");
+
+	/* Allocations should go to the arena. */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc should succeed without tcache");
+	free(p);
+
+	/* Restore the original values */
+	tcache_stack_alloc = orig_tcache_stack_alloc;
+	opt_abort = orig_opt_abort;
+
+	/*
+	 * Try to enable the tcache again.  This time initialization
+	 * should succeed.
+	 */
+	bool e0, e1;
+	size_t bool_sz = sizeof(bool);
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* Restore the original tcache max. */
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	    (void *)&old_tcache_max, sizeof(old_tcache_max)), 0,
+	    "Unexpected mallctl failure");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_tcache_data_init_oom, test_tcache_reinit_oom);
+}
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 1f657c85..ab54da39 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -3,12 +3,7 @@
 
 const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
 
-enum {
-	alloc_option_start = 0,
-	use_malloc = 0,
-	use_mallocx,
-	alloc_option_end
-};
+enum { alloc_option_start = 0, use_malloc = 0, use_mallocx, alloc_option_end };
 
 enum {
 	dalloc_option_start = 0,
@@ -18,11 +13,10 @@ enum {
 	dalloc_option_end
 };
 
-static unsigned alloc_option, dalloc_option;
-static size_t tcache_max;
+static bool global_test;
 
 static void *
-alloc_func(size_t sz) {
+alloc_func(size_t sz, unsigned alloc_option) {
 	void *ret;
 
 	switch (alloc_option) {
@@ -41,7 +35,7 @@ alloc_func(size_t sz) {
 }
 
 static void
-dalloc_func(void *ptr, size_t sz) {
+dalloc_func(void *ptr, size_t sz, unsigned dalloc_option) {
 	switch (dalloc_option) {
 	case use_free:
 		free(ptr);
@@ -58,97 +52,131 @@ dalloc_func(void *ptr, size_t sz) {
 }
 
 static size_t
-tcache_bytes_read(void) {
+tcache_bytes_read_global(void) {
 	uint64_t epoch;
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	size_t tcache_bytes;
 	size_t sz = sizeof(tcache_bytes);
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	return tcache_bytes;
 }
 
+static size_t
+tcache_bytes_read_local(void) {
+	size_t    tcache_bytes = 0;
+	tsd_t    *tsd = tsd_fetch();
+	tcache_t *tcache = tcache_get(tsd);
+	for (szind_t i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) {
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) {
+			continue;
+		}
+		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+		tcache_bytes += ncached * sz_index2size(i);
+	}
+	return tcache_bytes;
+}
 static void
 tcache_bytes_check_update(size_t *prev, ssize_t diff) {
-	size_t tcache_bytes = tcache_bytes_read();
+	size_t tcache_bytes = global_test ? tcache_bytes_read_global()
+	                                  : tcache_bytes_read_local();
 	expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
-
 	*prev += diff;
 }
 
 static void
-test_tcache_bytes_alloc(size_t alloc_size) {
+test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
+    unsigned alloc_option, unsigned dalloc_option) {
 	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
 	    "Unexpected tcache flush failure");
 
 	size_t usize = sz_s2u(alloc_size);
 	/* No change is expected if usize is outside of tcache_max range. */
-	bool cached = (usize <= tcache_max);
+	bool    cached = (usize <= tcache_max);
 	ssize_t diff = cached ? usize : 0;
 
-	void *ptr1 = alloc_func(alloc_size);
-	void *ptr2 = alloc_func(alloc_size);
+	void *ptr1 = alloc_func(alloc_size, alloc_option);
+	void *ptr2 = alloc_func(alloc_size, alloc_option);
 
-	size_t bytes = tcache_bytes_read();
-	dalloc_func(ptr2, alloc_size);
+	size_t bytes = global_test ? tcache_bytes_read_global()
+	                           : tcache_bytes_read_local();
+	dalloc_func(ptr2, alloc_size, dalloc_option);
 	/* Expect tcache_bytes increase after dalloc */
 	tcache_bytes_check_update(&bytes, diff);
 
-	dalloc_func(ptr1, alloc_size);
+	dalloc_func(ptr1, alloc_size, alloc_option);
 	/* Expect tcache_bytes increase again */
 	tcache_bytes_check_update(&bytes, diff);
 
-	void *ptr3 = alloc_func(alloc_size);
+	void *ptr3 = alloc_func(alloc_size, alloc_option);
 	if (cached) {
 		expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr");
 	}
 	/* Expect tcache_bytes decrease after alloc */
 	tcache_bytes_check_update(&bytes, -diff);
 
-	void *ptr4 = alloc_func(alloc_size);
+	void *ptr4 = alloc_func(alloc_size, alloc_option);
 	if (cached) {
 		expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr");
 	}
 	/* Expect tcache_bytes decrease again */
 	tcache_bytes_check_update(&bytes, -diff);
 
-	dalloc_func(ptr3, alloc_size);
+	dalloc_func(ptr3, alloc_size, dalloc_option);
 	tcache_bytes_check_update(&bytes, diff);
-	dalloc_func(ptr4, alloc_size);
+	dalloc_func(ptr4, alloc_size, dalloc_option);
 	tcache_bytes_check_update(&bytes, diff);
 }
 
 static void
-test_tcache_max_impl(void) {
-	size_t sz;
+test_tcache_max_impl(
+    size_t target_tcache_max, unsigned alloc_option, unsigned dalloc_option) {
+	size_t tcache_max, sz;
 	sz = sizeof(tcache_max);
-	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+	if (global_test) {
+		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+		                &sz, NULL, 0),
+		    0, "Unexpected mallctl() failure");
+		expect_zu_eq(tcache_max, target_tcache_max,
+		    "Global tcache_max not expected");
+	} else {
+		assert_d_eq(mallctl("thread.tcache.max", (void *)&tcache_max,
+		                &sz, NULL, .0),
+		    0, "Unexpected.mallctl().failure");
+		expect_zu_eq(tcache_max, target_tcache_max,
+		    "Current thread's tcache_max not expected");
+	}
+	test_tcache_bytes_alloc(1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max + 1, tcache_max, alloc_option, dalloc_option);
 
-	/* opt.tcache_max set to 1024 in tcache_max.sh */
-	expect_zu_eq(tcache_max, 1024, "tcache_max not expected");
-
-	test_tcache_bytes_alloc(1);
-	test_tcache_bytes_alloc(tcache_max - 1);
-	test_tcache_bytes_alloc(tcache_max);
-	test_tcache_bytes_alloc(tcache_max + 1);
-
-	test_tcache_bytes_alloc(PAGE - 1);
-	test_tcache_bytes_alloc(PAGE);
-	test_tcache_bytes_alloc(PAGE + 1);
+	test_tcache_bytes_alloc(
+	    PAGE - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(PAGE, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    PAGE + 1, tcache_max, alloc_option, dalloc_option);
 
 	size_t large;
 	sz = sizeof(large);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
-	test_tcache_bytes_alloc(large - 1);
-	test_tcache_bytes_alloc(large);
-	test_tcache_bytes_alloc(large + 1);
+	test_tcache_bytes_alloc(
+	    large - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(large, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    large + 1, tcache_max, alloc_option, dalloc_option);
 }
 
 TEST_BEGIN(test_tcache_max) {
@@ -157,19 +185,223 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(opt_prof);
 	test_skip_if(san_uaf_detection_enabled());
 
-	for (alloc_option = alloc_option_start;
-	     alloc_option < alloc_option_end;
-	     alloc_option++) {
+	unsigned arena_ind, alloc_option, dalloc_option;
+	size_t   sz = sizeof(arena_ind);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
+
+	global_test = true;
+	for (alloc_option = alloc_option_start; alloc_option < alloc_option_end;
+	    alloc_option++) {
 		for (dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end;
-		     dalloc_option++) {
-			test_tcache_max_impl();
+		    dalloc_option < dalloc_option_end; dalloc_option++) {
+			/* opt.tcache_max set to 1024 in tcache_max.sh. */
+			test_tcache_max_impl(1024, alloc_option, dalloc_option);
 		}
 	}
+	global_test = false;
+}
+TEST_END
+
+TEST_BEGIN(test_large_tcache_nrequests_on_miss) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(opt_prof);
+	test_skip_if(san_uaf_detection_enabled());
+
+	size_t large;
+	size_t sz = sizeof(large);
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL, (void *)&large,
+	                sizeof(large)),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
+
+	tsd_t *tsd = tsd_fetch();
+	expect_ptr_not_null(tsd, "Unexpected tsd_fetch() failure");
+	tcache_t *tcache = tcache_get(tsd);
+	expect_ptr_not_null(tcache, "Expected auto tcache");
+
+	szind_t binind = sz_size2index(large);
+	expect_true(binind >= SC_NBINS, "Expected large size class");
+	cache_bin_t *bin = &tcache->bins[binind];
+	bin->tstats.nrequests = 0;
+
+	void *p = mallocx(large, 0);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_u64_eq(bin->tstats.nrequests, 1,
+	    "Large tcache miss should count as one request");
+
+	dallocx(p, 0);
+	p = mallocx(large, 0);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_u64_eq(bin->tstats.nrequests, 2,
+	    "Large tcache hit should increment request count again");
+
+	dallocx(p, 0);
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
+}
+TEST_END
+
+static size_t
+tcache_max2nbins(size_t tcache_max) {
+	return sz_size2index(tcache_max) + 1;
+}
+
+static void
+validate_tcache_stack(tcache_t *tcache) {
+	/* Assume bins[0] is enabled. */
+	void *tcache_stack = tcache->bins[0].stack_head;
+	bool  expect_found = cache_bin_stack_use_thp() ? true : false;
+
+	/* Walk through all blocks to see if the stack is within range. */
+	base_t       *base = b0get();
+	base_block_t *next = base->blocks;
+	bool          found = false;
+	do {
+		base_block_t *block = next;
+		if ((byte_t *)tcache_stack >= (byte_t *)block
+		    && (byte_t *)tcache_stack
+		        < ((byte_t *)block + block->size)) {
+			found = true;
+			break;
+		}
+		next = block->next;
+	} while (next != NULL);
+
+	expect_true(found == expect_found, "Unexpected tcache stack source");
+}
+
+static void *
+tcache_check(void *arg) {
+	size_t         old_tcache_max, new_tcache_max, min_tcache_max, sz;
+	unsigned       tcache_nbins;
+	tsd_t         *tsd = tsd_fetch();
+	tcache_t      *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	sz = sizeof(size_t);
+	new_tcache_max = *(size_t *)arg;
+	min_tcache_max = 1;
+
+	/*
+	 * Check the default tcache_max and tcache_nbins of each thread's
+	 * auto tcache.
+	 */
+	old_tcache_max = tcache_max_get(tcache_slow);
+	expect_zu_eq(old_tcache_max, opt_tcache_max,
+	    "Unexpected default value for tcache_max");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, (size_t)global_do_not_change_tcache_nbins,
+	    "Unexpected default value for tcache_nbins");
+	validate_tcache_stack(tcache);
+
+	/*
+	 * Close the tcache and test the set.
+	 * Test an input that is not a valid size class, it should be ceiled
+	 * to a valid size class.
+	 */
+	bool   e0 = false, e1;
+	size_t bool_sz = sizeof(bool);
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
+	expect_true(e1, "Unexpected previous tcache state");
+
+	size_t temp_tcache_max = TCACHE_MAXCLASS_LIMIT - 1;
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&temp_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
+	old_tcache_max = tcache_max_get(tcache_slow);
+	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
+	    "Unexpected value for tcache_max");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, TCACHE_NBINS_MAX,
+	    "Unexpected value for tcache_nbins");
+	assert_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	                (void *)&min_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
+	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
+	    "Unexpected value for tcache_max");
+
+	/* Enable tcache, the set should still be valid. */
+	e0 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
+	expect_false(e1, "Unexpected previous tcache state");
+	min_tcache_max = sz_s2u(min_tcache_max);
+	expect_zu_eq(tcache_max_get(tcache_slow), min_tcache_max,
+	    "Unexpected value for tcache_max");
+	expect_zu_eq(tcache_nbins_get(tcache_slow),
+	    tcache_max2nbins(min_tcache_max), "Unexpected value for nbins");
+	assert_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	                (void *)&new_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
+	expect_zu_eq(
+	    old_tcache_max, min_tcache_max, "Unexpected value for tcache_max");
+	validate_tcache_stack(tcache);
+
+	/*
+	 * Check the thread's tcache_max and nbins both through mallctl
+	 * and alloc tests.
+	 */
+	if (new_tcache_max > TCACHE_MAXCLASS_LIMIT) {
+		new_tcache_max = TCACHE_MAXCLASS_LIMIT;
+	}
+	old_tcache_max = tcache_max_get(tcache_slow);
+	expect_zu_eq(
+	    old_tcache_max, new_tcache_max, "Unexpected value for tcache_max");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, tcache_max2nbins(new_tcache_max),
+	    "Unexpected value for tcache_nbins");
+	for (unsigned alloc_option = alloc_option_start;
+	    alloc_option < alloc_option_end; alloc_option++) {
+		for (unsigned dalloc_option = dalloc_option_start;
+		    dalloc_option < dalloc_option_end; dalloc_option++) {
+			test_tcache_max_impl(
+			    new_tcache_max, alloc_option, dalloc_option);
+		}
+		validate_tcache_stack(tcache);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_thread_tcache_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(opt_prof);
+	test_skip_if(san_uaf_detection_enabled());
+
+	unsigned nthreads = 8;
+	global_test = false;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	VARIABLE_ARRAY(size_t, all_threads_tcache_max, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		all_threads_tcache_max[i] = 1024 * (1 << ((i + 10) % 20));
+		if (i == nthreads - 1) {
+			all_threads_tcache_max[i] = UINT_MAX;
+		}
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(
+		    &threads[i], tcache_check, &(all_threads_tcache_max[i]));
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_tcache_max);
+	return test(test_tcache_max, test_large_tcache_nrequests_on_miss,
+	    test_thread_tcache_max);
 }
diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
index 4480d733..0de75e4b 100644
--- a/test/unit/tcache_max.sh
+++ b/test/unit/tcache_max.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="tcache_max:1024"
+export MALLOC_CONF="tcache_max:1024,tcache_gc_incr_bytes:939524096"
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index 8cd2b3bb..47e5fa9e 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -3,7 +3,7 @@
 static bool hook_called = false;
 
 static void
-hook() {
+hook(void) {
 	hook_called = true;
 }
 
@@ -32,7 +32,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    unhooked_call,
-	    hooked_call);
+	return test(unhooked_call, hooked_call);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index e0b88a92..d886c998 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -1,22 +1,40 @@
 #include "test/jemalloc_test.h"
 
+static uint32_t nuser_hook_calls;
+static bool     is_registered = false;
+static void
+test_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
+	++nuser_hook_calls;
+}
+
+static user_hook_object_t tobj = {
+    .callback = &test_cb, .interval = 10, .is_alloc_only = false};
+
 TEST_BEGIN(test_next_event_fast) {
-	tsd_t *tsd = tsd_fetch();
+	tsd_t   *tsd = tsd_fetch();
 	te_ctx_t ctx;
 	te_ctx_get(tsd, &ctx, true);
 
 	te_ctx_last_event_set(&ctx, 0);
 	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U);
 	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
-#define E(event, condition, is_alloc)					\
-	if (is_alloc && condition) {					\
-		event##_event_wait_set(tsd, TE_NEXT_EVENT_FAST_MAX);	\
+
+	if (!is_registered) {
+		is_registered = 0
+		    == te_register_user_handler(tsd_tsdn(tsd), &tobj);
+	}
+	assert_true(is_registered || !config_stats, "Register user handler");
+	nuser_hook_calls = 0;
+
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
+	for (size_t i = 0; i < te_alloc_count; i++) {
+		waits[i] = TE_NEXT_EVENT_FAST_MAX;
 	}
-	ITERATE_OVER_ALL_EVENTS
-#undef E
 
 	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);
+	assert_true(
+	    nuser_hook_calls == 1 || !config_stats, "Expected alloc call");
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 
@@ -29,6 +47,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_next_event_fast);
+	return test(test_next_event_fast);
 }
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index 0dd77861..31a2b8e0 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -6,22 +6,22 @@ TEST_BEGIN(test_ticker_tick) {
 #define NREPS 2
 #define NTICKS 3
 	ticker_t ticker;
-	int32_t i, j;
+	int32_t  i, j;
 
 	ticker_init(&ticker, NTICKS);
 	for (i = 0; i < NREPS; i++) {
 		for (j = 0; j < NTICKS; j++) {
 			expect_u_eq(ticker_read(&ticker), NTICKS - j,
 			    "Unexpected ticker value (i=%d, j=%d)", i, j);
-			expect_false(ticker_tick(&ticker),
+			expect_false(ticker_tick(&ticker, false),
 			    "Unexpected ticker fire (i=%d, j=%d)", i, j);
 		}
-		expect_u32_eq(ticker_read(&ticker), 0,
-		    "Expected ticker depletion");
-		expect_true(ticker_tick(&ticker),
+		expect_u32_eq(
+		    ticker_read(&ticker), 0, "Expected ticker depletion");
+		expect_true(ticker_tick(&ticker, false),
 		    "Expected ticker fire (i=%d)", i);
-		expect_u32_eq(ticker_read(&ticker), NTICKS,
-		    "Expected ticker reset");
+		expect_u32_eq(
+		    ticker_read(&ticker), NTICKS, "Expected ticker reset");
 	}
 #undef NTICKS
 }
@@ -34,12 +34,15 @@ TEST_BEGIN(test_ticker_ticks) {
 	ticker_init(&ticker, NTICKS);
 
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
-	expect_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
+	expect_false(
+	    ticker_ticks(&ticker, NTICKS, false), "Unexpected ticker fire");
 	expect_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
-	expect_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&ticker, NTICKS, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 
-	expect_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&ticker, NTICKS + 1, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
@@ -52,20 +55,21 @@ TEST_BEGIN(test_ticker_copy) {
 	ticker_init(&ta, NTICKS);
 	ticker_copy(&tb, &ta);
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
-	expect_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&tb, NTICKS + 1, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 
-	ticker_tick(&ta);
+	ticker_tick(&ta, false);
 	ticker_copy(&tb, &ta);
 	expect_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
-	expect_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
+	expect_true(ticker_ticks(&tb, NTICKS, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
 TEST_END
 
 TEST_BEGIN(test_ticker_geom) {
-	const int32_t ticks = 100;
+	const int32_t  ticks = 100;
 	const uint64_t niters = 100 * 1000;
 
 	ticker_geom_t ticker;
@@ -74,7 +78,7 @@ TEST_BEGIN(test_ticker_geom) {
 	/* Just some random constant. */
 	uint64_t prng_state = 0x343219f93496db9fULL;
 	for (uint64_t i = 0; i < niters; i++) {
-		while(!ticker_geom_tick(&ticker, &prng_state)) {
+		while (!ticker_geom_tick(&ticker, &prng_state, false)) {
 			total_ticks++;
 		}
 	}
@@ -83,18 +87,52 @@ TEST_BEGIN(test_ticker_geom) {
 	 * used at the time this was tested, total_ticks is 95.1% of the
 	 * expected ticks.
 	 */
-	expect_u64_ge(total_ticks , niters * ticks * 9 / 10,
-	    "Mean off by > 10%%");
-	expect_u64_le(total_ticks , niters * ticks * 11 / 10,
-	    "Mean off by > 10%%");
+	expect_u64_ge(
+	    total_ticks, niters * ticks * 9 / 10, "Mean off by > 10%%");
+	expect_u64_le(
+	    total_ticks, niters * ticks * 11 / 10, "Mean off by > 10%%");
+}
+TEST_END
+
+TEST_BEGIN(test_ticker_delay) {
+	const int32_t  ticks = 1000;
+	const uint64_t niters = 10000;
+
+	ticker_t t1;
+	ticker_init(&t1, ticks);
+
+	ticker_geom_t t2;
+	/* Just some random constant. */
+	uint64_t prng_state = 0x43219f93496db9f3ULL;
+	ticker_geom_init(&t2, ticks);
+
+	bool delay = false;
+	expect_false(ticker_ticks(&t1, ticks, delay), "Unexpected ticker fire");
+	expect_false(ticker_geom_ticks(&t2, &prng_state, ticks, delay),
+	    "Unexpected ticker fire");
+	expect_d_eq(ticker_read(&t1), 0, "Unexpected ticker value");
+	expect_d_eq(ticker_geom_read(&t2), 0, "Unexpected ticker value");
+
+	delay = true;
+	/* Not allowed to fire when delay is set to true. */
+	for (unsigned i = 0; i < niters; i++) {
+		expect_false(ticker_tick(&t1, delay), "Unexpected ticker fire");
+		expect_false(ticker_geom_tick(&t2, &prng_state, delay),
+		    "Unexpected ticker fire");
+		expect_d_eq(ticker_read(&t1), 0, "Unexpected ticker value");
+		expect_d_eq(
+		    ticker_geom_read(&t2), 0, "Unexpected ticker value");
+	}
+
+	delay = false;
+	expect_true(ticker_tick(&t1, delay), "Expected ticker fire");
+	expect_true(
+	    ticker_geom_tick(&t2, &prng_state, delay), "Expected ticker fire");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ticker_tick,
-	    test_ticker_ticks,
-	    test_ticker_copy,
-	    test_ticker_geom);
+	return test(test_ticker_tick, test_ticker_ticks, test_ticker_copy,
+	    test_ticker_geom, test_ticker_delay);
 }
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 205d8708..9610ceac 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -5,7 +5,7 @@
  * be asserting that we're on one.
  */
 static bool originally_fast;
-static int data_cleanup_count;
+static int  data_cleanup_count;
 
 void
 data_cleanup(int *data) {
@@ -45,7 +45,7 @@ data_cleanup(int *data) {
 
 static void *
 thd_start(void *arg) {
-	int d = (int)(uintptr_t)arg;
+	int   d = (int)(uintptr_t)arg;
 	void *p;
 
 	/*
@@ -105,11 +105,10 @@ thd_start_reincarnated(void *arg) {
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	/* Manually trigger reincarnation. */
-	expect_ptr_not_null(tsd_arena_get(tsd),
-	    "Should have tsd arena set.");
+	expect_ptr_not_null(tsd_arena_get(tsd), "Should have tsd arena set.");
 	tsd_cleanup((void *)tsd);
-	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
-	    "TSD arena should have been cleared.");
+	expect_ptr_null(
+	    *tsd_arenap_get_unsafe(tsd), "TSD arena should have been cleared.");
 	expect_u_eq(tsd_state_get(tsd), tsd_state_purgatory,
 	    "TSD state should be purgatory\n");
 
@@ -136,9 +135,64 @@ TEST_BEGIN(test_tsd_reincarnation) {
 }
 TEST_END
 
+static void *
+thd_start_dalloc_only(void *arg) {
+	void **ptrs = (void **)arg;
+
+	tsd_t *tsd = tsd_fetch_min();
+	if (tsd_state_get(tsd) != tsd_state_minimal_initialized) {
+		/* Allocation happened implicitly. */
+		expect_u_eq(tsd_state_get(tsd), tsd_state_nominal,
+		    "TSD state should be nominal");
+		return NULL;
+	}
+
+	void *ptr;
+	for (size_t i = 0; (ptr = ptrs[i]) != NULL; i++) {
+		/* Offset by 1 because of the manual tsd_fetch_min above. */
+		if (i + 1 < TSD_MIN_INIT_STATE_MAX_FETCHED) {
+			expect_u_eq(tsd_state_get(tsd),
+			    tsd_state_minimal_initialized,
+			    "TSD should be minimal initialized");
+		} else {
+			/* State may be nominal or nominal_slow. */
+			expect_true(tsd_nominal(tsd), "TSD should be nominal");
+		}
+		free(ptr);
+	}
+
+	return NULL;
+}
+
+static void
+test_sub_thread_n_dalloc(size_t nptrs) {
+	void **ptrs = (void **)malloc(sizeof(void *) * (nptrs + 1));
+	for (size_t i = 0; i < nptrs; i++) {
+		ptrs[i] = malloc(8);
+	}
+	ptrs[nptrs] = NULL;
+
+	thd_t thd;
+	thd_create(&thd, thd_start_dalloc_only, (void *)ptrs);
+	thd_join(thd, NULL);
+	free(ptrs);
+}
+
+TEST_BEGIN(test_tsd_sub_thread_dalloc_only) {
+	test_sub_thread_n_dalloc(1);
+	test_sub_thread_n_dalloc(16);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED - 2);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED - 1);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED + 1);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED + 2);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED * 2);
+}
+TEST_END
+
 typedef struct {
 	atomic_u32_t phase;
-	atomic_b_t error;
+	atomic_b_t   error;
 } global_slow_data_t;
 
 static void *
@@ -152,8 +206,8 @@ thd_start_global_slow(void *arg) {
 	 * No global slowness has happened yet; there was an error if we were
 	 * originally fast but aren't now.
 	 */
-	atomic_store_b(&data->error, originally_fast && !tsd_fast(tsd),
-	    ATOMIC_SEQ_CST);
+	atomic_store_b(
+	    &data->error, originally_fast && !tsd_fast(tsd), ATOMIC_SEQ_CST);
 	atomic_store_u32(&data->phase, 1, ATOMIC_SEQ_CST);
 
 	/* PHASE 2 */
@@ -186,8 +240,8 @@ thd_start_global_slow(void *arg) {
 	 * Both decrements happened; we should be fast again (if we ever
 	 * were)
 	 */
-	atomic_store_b(&data->error, originally_fast && !tsd_fast(tsd),
-	    ATOMIC_SEQ_CST);
+	atomic_store_b(
+	    &data->error, originally_fast && !tsd_fast(tsd), ATOMIC_SEQ_CST);
 	atomic_store_u32(&data->phase, 9, ATOMIC_SEQ_CST);
 
 	return NULL;
@@ -266,9 +320,7 @@ main(void) {
 		return test_status_fail;
 	}
 
-	return test_no_reentrancy(
-	    test_tsd_main_thread,
-	    test_tsd_sub_thread,
-	    test_tsd_reincarnation,
+	return test_no_reentrancy(test_tsd_main_thread, test_tsd_sub_thread,
+	    test_tsd_sub_thread_dalloc_only, test_tsd_reincarnation,
 	    test_tsd_global_slow);
 }
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
index a8433c29..25399ed0 100644
--- a/test/unit/uaf.c
+++ b/test/unit/uaf.c
@@ -11,7 +11,8 @@ const char *malloc_conf = TEST_SAN_UAF_ALIGN_ENABLE;
 static size_t san_uaf_align;
 
 static bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -24,8 +25,8 @@ test_write_after_free_pre(void) {
 
 static void
 test_write_after_free_post(void) {
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    0, "Unexpected tcache flush failure");
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
 	expect_true(fake_abort_called, "Use-after-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
@@ -37,9 +38,10 @@ uaf_detection_enabled(void) {
 	}
 
 	ssize_t lg_san_uaf_align;
-	size_t sz = sizeof(lg_san_uaf_align);
-	assert_d_eq(mallctl("opt.lg_san_uaf_align", &lg_san_uaf_align, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t  sz = sizeof(lg_san_uaf_align);
+	assert_d_eq(
+	    mallctl("opt.lg_san_uaf_align", &lg_san_uaf_align, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	if (lg_san_uaf_align < 0) {
 		return false;
 	}
@@ -48,8 +50,9 @@ uaf_detection_enabled(void) {
 
 	bool tcache_enabled;
 	sz = sizeof(tcache_enabled);
-	assert_d_eq(mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	assert_d_eq(
+	    mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	if (!tcache_enabled) {
 		return false;
 	}
@@ -69,10 +72,10 @@ read_tcache_stashed_bytes(unsigned arena_ind) {
 
 	size_t tcache_stashed_bytes;
 	size_t sz = sizeof(tcache_stashed_bytes);
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	return tcache_stashed_bytes;
 }
@@ -91,17 +94,17 @@ test_use_after_free(size_t alloc_size, bool write_after_free) {
 	 * make use-after-free tolerable.
 	 */
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int      flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	size_t n_max = san_uaf_align * 2;
 	void **items = mallocx(n_max * sizeof(void *), flags);
 	assert_ptr_not_null(items, "Unexpected mallocx failure");
 
-	bool found = false;
+	bool   found = false;
 	size_t iter = 0;
-	char magic = 's';
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    0, "Unexpected tcache flush failure");
+	char   magic = 's';
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
 	while (!found) {
 		ptr = mallocx(alloc_size, flags);
 		assert_ptr_not_null(ptr, "Unexpected mallocx failure");
@@ -194,7 +197,7 @@ static bool
 check_allocated_intact(void **allocated, size_t n_alloc) {
 	for (unsigned i = 0; i < n_alloc; i++) {
 		void *ptr = *(void **)allocated[i];
-		bool found = false;
+		bool  found = false;
 		for (unsigned j = 0; j < n_alloc; j++) {
 			if (ptr == allocated[j]) {
 				found = true;
@@ -213,7 +216,7 @@ TEST_BEGIN(test_use_after_free_integration) {
 	test_skip_if(!uaf_detection_enabled());
 
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind);
+	int      flags = MALLOCX_ARENA(arena_ind);
 
 	size_t n_alloc = san_uaf_align * 2;
 	void **allocated = mallocx(n_alloc * sizeof(void *), flags);
@@ -255,8 +258,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_read_after_free,
-	    test_write_after_free,
+	return test(test_read_after_free, test_write_after_free,
 	    test_use_after_free_integration);
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 5a6c4482..ccefb5a2 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -1,9 +1,9 @@
 #include "test/jemalloc_test.h"
 
-static witness_lock_error_t *witness_lock_error_orig;
-static witness_owner_error_t *witness_owner_error_orig;
+static witness_lock_error_t      *witness_lock_error_orig;
+static witness_owner_error_t     *witness_owner_error_orig;
 static witness_not_owner_error_t *witness_not_owner_error_orig;
-static witness_depth_error_t *witness_depth_error_orig;
+static witness_depth_error_t     *witness_depth_error_orig;
 
 static bool saw_lock_error;
 static bool saw_owner_error;
@@ -11,8 +11,8 @@ static bool saw_not_owner_error;
 static bool saw_depth_error;
 
 static void
-witness_lock_error_intercept(const witness_list_t *witnesses,
-    const witness_t *witness) {
+witness_lock_error_intercept(
+    const witness_list_t *witnesses, const witness_t *witness) {
 	saw_lock_error = true;
 }
 
@@ -43,8 +43,8 @@ witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
 }
 
 static int
-witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
-    void *ob) {
+witness_comp_reverse(
+    const witness_t *a, void *oa, const witness_t *b, void *ob) {
 	expect_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
@@ -54,8 +54,8 @@ witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
 }
 
 TEST_BEGIN(test_witness) {
-	witness_t a, b;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -94,8 +94,8 @@ TEST_BEGIN(test_witness) {
 TEST_END
 
 TEST_BEGIN(test_witness_comp) {
-	witness_t a, b, c, d;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b, c, d;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -146,8 +146,8 @@ TEST_BEGIN(test_witness_comp) {
 TEST_END
 
 TEST_BEGIN(test_witness_reversal) {
-	witness_t a, b;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -177,8 +177,8 @@ TEST_BEGIN(test_witness_reversal) {
 TEST_END
 
 TEST_BEGIN(test_witness_recursive) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -207,13 +207,12 @@ TEST_BEGIN(test_witness_recursive) {
 
 	witness_owner_error = witness_owner_error_orig;
 	witness_lock_error = witness_lock_error_orig;
-
 }
 TEST_END
 
 TEST_BEGIN(test_witness_unlock_not_owned) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -236,8 +235,8 @@ TEST_BEGIN(test_witness_unlock_not_owned) {
 TEST_END
 
 TEST_BEGIN(test_witness_depth) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -270,11 +269,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_witness,
-	    test_witness_comp,
-	    test_witness_reversal,
-	    test_witness_recursive,
-	    test_witness_unlock_not_owned,
+	return test(test_witness, test_witness_comp, test_witness_reversal,
+	    test_witness_recursive, test_witness_unlock_not_owned,
 	    test_witness_depth);
 }
diff --git a/test/unit/zero.c b/test/unit/zero.c
index d3e81f1b..522d6908 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -3,35 +3,35 @@
 static void
 test_zero(size_t sz_min, size_t sz_max) {
 	uint8_t *s;
-	size_t sz_prev, sz, i;
-#define MAGIC	((uint8_t)0x61)
+	size_t   sz_prev, sz, i;
+#define MAGIC ((uint8_t)0x61)
 
 	sz_prev = 0;
 	s = (uint8_t *)mallocx(sz_min, 0);
 	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
-	    sz_prev = sz, sz = sallocx(s, 0)) {
+	     sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
 			expect_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			expect_u_eq(s[sz_prev-1], MAGIC,
+			expect_u_eq(s[sz_prev - 1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
-			    sz_prev-1, sz_prev);
+			    sz_prev - 1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			expect_u_eq(s[i], 0x0,
-			    "Newly allocated byte %zu/%zu isn't zero-filled",
-			    i, sz);
+			    "Newly allocated byte %zu/%zu isn't zero-filled", i,
+			    sz);
 			s[i] = MAGIC;
 		}
 
-		if (xallocx(s, sz+1, 0, 0) == sz) {
-			s = (uint8_t *)rallocx(s, sz+1, 0);
-			expect_ptr_not_null((void *)s,
-			    "Unexpected rallocx() failure");
+		if (xallocx(s, sz + 1, 0, 0) == sz) {
+			s = (uint8_t *)rallocx(s, sz + 1, 0);
+			expect_ptr_not_null(
+			    (void *)s, "Unexpected rallocx() failure");
 		}
 	}
 
@@ -53,7 +53,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_zero_small,
-	    test_zero_large);
+	return test(test_zero_small, test_zero_large);
 }
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
index a880d104..1d8bf9c3 100644
--- a/test/unit/zero_realloc_abort.c
+++ b/test/unit/zero_realloc_abort.c
@@ -4,7 +4,9 @@
 
 static bool abort_called = false;
 
-void set_abort_called() {
+void
+set_abort_called(const char *message) {
+	(void)message;
 	abort_called = true;
 };
 
@@ -20,7 +22,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_realloc_abort);
+	return test(test_realloc_abort);
 }
-
diff --git a/test/unit/zero_realloc_alloc.c b/test/unit/zero_realloc_alloc.c
index 65e07bdb..5b4f985f 100644
--- a/test/unit/zero_realloc_alloc.c
+++ b/test/unit/zero_realloc_alloc.c
@@ -1,26 +1,28 @@
 #include "test/jemalloc_test.h"
 
 static uint64_t
-allocated() {
+allocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
 	uint64_t allocated;
-	size_t sz = sizeof(allocated);
-	expect_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(allocated);
+	expect_d_eq(
+	    mallctl("thread.allocated", (void *)&allocated, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	return allocated;
 }
 
 static uint64_t
-deallocated() {
+deallocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
 	uint64_t deallocated;
-	size_t sz = sizeof(deallocated);
-	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(deallocated);
+	expect_d_eq(
+	    mallctl("thread.deallocated", (void *)&deallocated, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
@@ -43,6 +45,5 @@ TEST_BEGIN(test_realloc_alloc) {
 TEST_END
 int
 main(void) {
-	return test(
-	    test_realloc_alloc);
+	return test(test_realloc_alloc);
 }
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
index baed86c9..c2aa0afa 100644
--- a/test/unit/zero_realloc_free.c
+++ b/test/unit/zero_realloc_free.c
@@ -1,14 +1,15 @@
 #include "test/jemalloc_test.h"
 
 static uint64_t
-deallocated() {
+deallocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
 	uint64_t deallocated;
-	size_t sz = sizeof(deallocated);
-	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(deallocated);
+	expect_d_eq(
+	    mallctl("thread.deallocated", (void *)&deallocated, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
@@ -28,6 +29,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_realloc_free);
+	return test(test_realloc_free);
 }
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
index 66c7a404..6c4a51d6 100644
--- a/test/unit/zero_reallocs.c
+++ b/test/unit/zero_reallocs.c
@@ -1,15 +1,16 @@
 #include "test/jemalloc_test.h"
 
 static size_t
-zero_reallocs() {
+zero_reallocs(void) {
 	if (!config_stats) {
 		return 0;
 	}
 	size_t count = 12345;
 	size_t sz = sizeof(count);
 
-	expect_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("stats.zero_reallocs", (void *)&count, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	return count;
 }
 
@@ -35,6 +36,5 @@ main(void) {
 	 * We expect explicit counts; reentrant tests run multiple times, so
 	 * counts leak across runs.
 	 */
-	return test_no_reentrancy(
-	    test_zero_reallocs);
+	return test_no_reentrancy(test_zero_reallocs);
 }