From 1cc0e4a83865f5a8d836c0d83471290514124b50 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Thu, 28 May 2026 19:01:12 -0400 Subject: [PATCH 01/19] Add header dep graph script for cleanup verification --- scripts/gen_header_dep_graph.sh | 100 ++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100755 scripts/gen_header_dep_graph.sh diff --git a/scripts/gen_header_dep_graph.sh b/scripts/gen_header_dep_graph.sh new file mode 100755 index 00000000..0b99d5b0 --- /dev/null +++ b/scripts/gen_header_dep_graph.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Generate header dependency graph + tsort cycle check. +# +# Outputs (under build/): +# header_deps_baseline.txt - sorted, unique header->header (and .c->header) +# edges derived from #include "..." directives. +# header_tsort_order.txt - topological order produced by tsort. +# header_tsort_cycles.txt - stderr from tsort; non-empty if there's a cycle. +# per_tu_deps.txt - per-translation-unit transitive header lists +# (one TU per line; basenames only; sorted). +# +# All headers / sources are reduced to basenames so renames in the cleanup are +# easy to spot in the diff. + +set -euo pipefail + +repo_root="$(cd "$(dirname "$0")/.." && pwd)" +cd "$repo_root" +mkdir -p build + +edges_raw=build/header_edges_raw.txt +edges_baseline=build/header_deps_baseline.txt +tsort_order=build/header_tsort_order.txt +tsort_cycles=build/header_tsort_cycles.txt +per_tu=build/per_tu_deps.txt + +: > "$edges_raw" + +extract_edges() { + local src="$1" + local bn + bn=$(basename "$src") + # Match `#include "..."` (skip `<...>` system includes). Strip path, keep + # only the basename of the included file, so the graph collapses + # `../jemalloc.h` and similar relative paths into a single node. + awk ' + /^[[:space:]]*#[[:space:]]*include[[:space:]]+"/ { + match($0, /"[^"]+"/) + inc = substr($0, RSTART + 1, RLENGTH - 2) + n = split(inc, parts, "/") + print parts[n] + } + ' "$src" | while read -r dep; do + printf '%s %s\n' "$bn" "$dep" + done +} + +# Headers (include/jemalloc/internal/) — these are the nodes we care about for +# cycle detection. +for f in include/jemalloc/internal/*.h include/jemalloc/*.h; do + extract_edges "$f" >> "$edges_raw" +done + +# Translation units (src/*.c) — included so that .c files appear as graph +# sources in the baseline. They can't introduce cycles (nothing #includes a +# .c), but the edges are useful when diffing later. +for f in src/*.c; do + extract_edges "$f" >> "$edges_raw" +done + +sort -u "$edges_raw" > "$edges_baseline" + +# tsort consumes "A B" pairs and reports cycles on stderr. +: > "$tsort_cycles" +if ! tsort "$edges_baseline" > "$tsort_order" 2> "$tsort_cycles"; then + echo "tsort exited non-zero; see $tsort_cycles" >&2 +fi + +# Per-translation-unit transitive header lists, harvested from the .d files +# the build already produced (CC_MM=1 in the Makefile). +: > "$per_tu" +for d in src/*.d; do + [ -f "$d" ] || continue + tu=$(basename "${d%.d}.c") + # Strip the make rule prefix ("foo.o: foo.c \"), drop line continuations, + # collapse to basenames, sort+uniq. + deps=$( + tr '\n' ' ' < "$d" \ + | sed -E 's/\\//g' \ + | tr ' ' '\n' \ + | grep -E '\.h$' \ + | awk -F/ '{print $NF}' \ + | sort -u \ + | tr '\n' ' ' \ + | sed -E 's/[[:space:]]+$//' + ) + printf '%s: %s\n' "$tu" "$deps" >> "$per_tu" +done +sort -o "$per_tu" "$per_tu" + +edge_count=$(wc -l < "$edges_baseline" | tr -d ' ') +cycle_bytes=$(wc -c < "$tsort_cycles" | tr -d ' ') +tu_count=$(wc -l < "$per_tu" | tr -d ' ') +echo "edges: $edge_count" +echo "translation units: $tu_count" +echo "tsort cycle output bytes: $cycle_bytes" +if [ "$cycle_bytes" -gt 0 ]; then + echo "---- tsort cycle report ----" + cat "$tsort_cycles" +fi From e2c0f07a7e13e322e4538ab71e69b93e1cf53a8f Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Thu, 28 May 2026 19:01:19 -0400 Subject: [PATCH 02/19] Move malloc routing into new malloc_dispatch module Pull the tcache-aware allocation routing helpers out of arena into a layer that sits directly below the public malloc interface: arena_malloc -> malloc_dispatch_malloc arena_palloc -> malloc_dispatch_palloc arena_ralloc -> malloc_dispatch_ralloc arena_dalloc* -> malloc_dispatch_dalloc* arena_sdalloc* -> malloc_dispatch_sdalloc* arena_dalloc_promoted -> malloc_dispatch_dalloc_promoted These helpers decide whether to route through tcache or fall through to arena/large fast paths. They are now owned by malloc_dispatch_inlines.h + src/malloc_dispatch.c, and the only consumers are the public-front-end wrappers in jemalloc_internal_inlines_c.h. arena keeps a narrower arena_prof_demote() helper for the sampled allocation demotion + redzone verification it used to perform inline. arena_inlines_b.h no longer includes tcache_inlines.h -- the symbol level arena <-> tcache cycle is gone (it's now in malloc_dispatch). --- Makefile.in | 1 + include/jemalloc/internal/arena_externs.h | 7 +- include/jemalloc/internal/arena_inlines_b.h | 259 ---------------- .../internal/jemalloc_internal_includes.h | 2 + .../internal/jemalloc_internal_inlines_c.h | 14 +- .../internal/malloc_dispatch_externs.h | 19 ++ .../internal/malloc_dispatch_inlines.h | 279 ++++++++++++++++++ .../projects/vc2015/jemalloc/jemalloc.vcxproj | 1 + .../vc2015/jemalloc/jemalloc.vcxproj.filters | 3 + .../projects/vc2017/jemalloc/jemalloc.vcxproj | 1 + .../vc2017/jemalloc/jemalloc.vcxproj.filters | 3 + .../projects/vc2019/jemalloc/jemalloc.vcxproj | 1 + .../vc2019/jemalloc/jemalloc.vcxproj.filters | 3 + .../projects/vc2022/jemalloc/jemalloc.vcxproj | 1 + .../vc2022/jemalloc/jemalloc.vcxproj.filters | 3 + src/arena.c | 121 +------- src/malloc_dispatch.c | 109 +++++++ 17 files changed, 442 insertions(+), 385 deletions(-) create mode 100644 include/jemalloc/internal/malloc_dispatch_externs.h create mode 100644 include/jemalloc/internal/malloc_dispatch_inlines.h create mode 100644 src/malloc_dispatch.c diff --git a/Makefile.in b/Makefile.in index f939350f..13bc5a24 100644 --- a/Makefile.in +++ b/Makefile.in @@ -132,6 +132,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/inspect.c \ $(srcroot)src/large.c \ $(srcroot)src/log.c \ + $(srcroot)src/malloc_dispatch.c \ $(srcroot)src/malloc_io.c \ $(srcroot)src/conf.c \ $(srcroot)src/mutex.c \ diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index 06189d56..694c5d81 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -70,12 +70,9 @@ cache_bin_sz_t arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, bool slab); -void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, - bool zero, bool slab, tcache_t *tcache); void arena_prof_promote( tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize); -void arena_dalloc_promoted( - tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); +size_t arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr); void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab); void arena_dalloc_small(tsdn_t *tsdn, void *ptr); @@ -84,8 +81,6 @@ void arena_ptr_array_flush(tsd_t *tsd, szind_t binind, arena_t *stats_arena, cache_bin_stats_t merge_stats); bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero, size_t *newsize); -void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache); dss_prec_t arena_dss_prec_get(const arena_t *arena); ehooks_t *arena_get_ehooks(const arena_t *arena); extent_hooks_t *arena_set_extent_hooks( diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 64957c7b..005f422e 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -17,7 +17,6 @@ #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" -#include "jemalloc/internal/tcache_inlines.h" #include "jemalloc/internal/ticker.h" static inline arena_t * @@ -193,26 +192,6 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) { arena_decay_ticks(tsdn, arena, 1); } -JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, - bool slab, tcache_t *tcache, bool slow_path) { - assert(!tsdn_null(tsdn) || tcache == NULL); - - if (likely(tcache != NULL)) { - if (likely(slab)) { - assert(sz_can_use_slab(size)); - return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache, - size, ind, zero, slow_path); - } else if (likely(tcache_can_cache_large(tcache, ind))) { - return tcache_alloc_large(tsdn_tsd(tsdn), arena, tcache, - size, ind, zero, slow_path); - } - /* (size > tcache_max) case falls through. */ - } - - return arena_malloc_hard(tsdn, arena, size, ind, zero, slab); -} - JEMALLOC_ALWAYS_INLINE arena_t * arena_aalloc(tsdn_t *tsdn, const void *ptr) { edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); @@ -261,244 +240,6 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) { return edata_usize_get(full_alloc_ctx.edata); } -static inline void -arena_dalloc_large_no_tcache( - tsdn_t *tsdn, void *ptr, szind_t szind, size_t usize) { - /* - * szind is still needed in this function mainly becuase - * szind < SC_NBINS determines not only if this is a small alloc, - * but also if szind is valid (an inactive extent would have - * szind == SC_NSIZES). - */ - if (config_prof && unlikely(szind < SC_NBINS)) { - arena_dalloc_promoted(tsdn, ptr, NULL, true); - } else { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - if (large_dalloc_safety_checks(edata, ptr, usize)) { - /* See the comment in isfree. */ - return; - } - large_dalloc(tsdn, edata); - } -} - -static inline void -arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { - assert(ptr != NULL); - - emap_alloc_ctx_t alloc_ctx; - emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); - - if (config_debug) { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - assert(alloc_ctx.szind == edata_szind_get(edata)); - assert(alloc_ctx.szind < SC_NSIZES); - assert(alloc_ctx.slab == edata_slab_get(edata)); - assert(emap_alloc_ctx_usize_get(&alloc_ctx) - == edata_usize_get(edata)); - } - - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - arena_dalloc_small(tsdn, ptr); - } else { - arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind, - emap_alloc_ctx_usize_get(&alloc_ctx)); - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind, - size_t usize, bool slow_path) { - assert(!tsdn_null(tsdn) && tcache != NULL); - bool is_sample_promoted = config_prof && szind < SC_NBINS; - if (unlikely(is_sample_promoted)) { - arena_dalloc_promoted(tsdn, ptr, tcache, slow_path); - } else { - if (tcache_can_cache_large(tcache, szind)) { - tcache_dalloc_large( - tsdn_tsd(tsdn), tcache, ptr, szind, slow_path); - } else { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - if (large_dalloc_safety_checks(edata, ptr, usize)) { - /* See the comment in isfree. */ - return; - } - large_dalloc(tsdn, edata); - } - } -} - -JEMALLOC_ALWAYS_INLINE bool -arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) { - if (!config_debug) { - return false; - } - edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - szind_t binind = edata_szind_get(edata); - div_info_t div_info = arena_binind_div_info[binind]; - /* - * Calls the internal function bin_slab_regind_impl because the - * safety check does not require a lock. - */ - size_t regind = bin_slab_regind_impl(&div_info, binind, edata, ptr); - slab_data_t *slab_data = edata_slab_data_get(edata); - const bin_info_t *bin_info = &bin_infos[binind]; - assert(edata_nfree_get(edata) < bin_info->nregs); - if (unlikely(!bitmap_get( - slab_data->bitmap, &bin_info->bitmap_info, regind))) { - safety_check_fail( - "Invalid deallocation detected: the pointer being freed (%p) not " - "currently active, possibly caused by double free bugs.\n", - ptr); - return true; - } - return false; -} - -JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, - emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) { - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(ptr != NULL); - - if (unlikely(tcache == NULL)) { - arena_dalloc_no_tcache(tsdn, ptr); - return; - } - - emap_alloc_ctx_t alloc_ctx; - if (caller_alloc_ctx != NULL) { - alloc_ctx = *caller_alloc_ctx; - } else { - util_assume(tsdn != NULL); - emap_alloc_ctx_lookup( - tsdn, &arena_emap_global, ptr, &alloc_ctx); - } - - if (config_debug) { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - assert(alloc_ctx.szind == edata_szind_get(edata)); - assert(alloc_ctx.szind < SC_NSIZES); - assert(alloc_ctx.slab == edata_slab_get(edata)); - assert(emap_alloc_ctx_usize_get(&alloc_ctx) - == edata_usize_get(edata)); - } - - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) { - return; - } - tcache_dalloc_small( - tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); - } else { - arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, - emap_alloc_ctx_usize_get(&alloc_ctx), slow_path); - } -} - -static inline void -arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { - assert(ptr != NULL); - assert(size <= SC_LARGE_MAXCLASS); - - emap_alloc_ctx_t alloc_ctx; - if (!config_prof || !opt_prof) { - /* - * There is no risk of being confused by a promoted sampled - * object, so base szind and slab on the given size. - */ - szind_t szind = sz_size2index(size); - emap_alloc_ctx_init( - &alloc_ctx, szind, (szind < SC_NBINS), size); - } - - if ((config_prof && opt_prof) || config_debug) { - emap_alloc_ctx_lookup( - tsdn, &arena_emap_global, ptr, &alloc_ctx); - - assert(alloc_ctx.szind == sz_size2index(size)); - assert((config_prof && opt_prof) - || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS)); - - if (config_debug) { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - assert(alloc_ctx.szind == edata_szind_get(edata)); - assert(alloc_ctx.slab == edata_slab_get(edata)); - } - } - - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - arena_dalloc_small(tsdn, ptr); - } else { - arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind, - emap_alloc_ctx_usize_get(&alloc_ctx)); - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) { - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(ptr != NULL); - assert(size <= SC_LARGE_MAXCLASS); - - if (unlikely(tcache == NULL)) { - arena_sdalloc_no_tcache(tsdn, ptr, size); - return; - } - - emap_alloc_ctx_t alloc_ctx; - if (config_prof && opt_prof) { - if (caller_alloc_ctx == NULL) { - /* Uncommon case and should be a static check. */ - emap_alloc_ctx_lookup( - tsdn, &arena_emap_global, ptr, &alloc_ctx); - assert(alloc_ctx.szind == sz_size2index(size)); - assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size); - } else { - alloc_ctx = *caller_alloc_ctx; - } - } else { - /* - * There is no risk of being confused by a promoted sampled - * object, so base szind and slab on the given size. - */ - alloc_ctx.szind = sz_size2index(size); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); - } - - if (config_debug) { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - assert(alloc_ctx.szind == edata_szind_get(edata)); - assert(alloc_ctx.slab == edata_slab_get(edata)); - emap_alloc_ctx_init( - &alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, sz_s2u(size)); - assert(emap_alloc_ctx_usize_get(&alloc_ctx) - == edata_usize_get(edata)); - } - - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) { - return; - } - tcache_dalloc_small( - tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); - } else { - arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, - sz_s2u(size), slow_path); - } -} - static inline void arena_cache_oblivious_randomize( tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t alignment) { diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h index 751c112f..d70c808f 100644 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -61,6 +61,7 @@ #include "jemalloc/internal/arena_externs.h" #include "jemalloc/internal/large_externs.h" #include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/malloc_dispatch_externs.h" #include "jemalloc/internal/prof_externs.h" #include "jemalloc/internal/background_thread_externs.h" @@ -77,6 +78,7 @@ #include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/tcache_inlines.h" #include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/malloc_dispatch_inlines.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/background_thread_inlines.h" diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 541821ae..323f32d5 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -8,6 +8,7 @@ #include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/log.h" +#include "jemalloc/internal/malloc_dispatch_inlines.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/witness.h" @@ -67,7 +68,7 @@ iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); } - ret = arena_malloc( + ret = malloc_dispatch_malloc( tsdn, arena, size, ind, zero, slab, tcache, slow_path); if (config_stats && is_internal && likely(ret != NULL)) { arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); @@ -102,7 +103,8 @@ ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, witness_assert_depth_to_rank( tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache); + ret = malloc_dispatch_palloc( + tsdn, arena, usize, alignment, zero, slab, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); if (config_stats && is_internal && likely(ret != NULL)) { arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); @@ -156,7 +158,7 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { assert(tcache == NULL); } - arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); + malloc_dispatch_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); } JEMALLOC_ALWAYS_INLINE void @@ -169,7 +171,7 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, emap_alloc_ctx_t *alloc_ctx, bool slow_path) { witness_assert_depth_to_rank( tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path); + malloc_dispatch_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path); } JEMALLOC_ALWAYS_INLINE void * @@ -217,8 +219,8 @@ iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, zero, slab, tcache, arena); } - return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero, - slab, tcache); + return malloc_dispatch_ralloc( + tsdn, arena, ptr, oldsize, size, alignment, zero, slab, tcache); } JEMALLOC_ALWAYS_INLINE void * diff --git a/include/jemalloc/internal/malloc_dispatch_externs.h b/include/jemalloc/internal/malloc_dispatch_externs.h new file mode 100644 index 00000000..197b5d6e --- /dev/null +++ b/include/jemalloc/internal/malloc_dispatch_externs.h @@ -0,0 +1,19 @@ +#ifndef JEMALLOC_INTERNAL_MALLOC_DISPATCH_EXTERNS_H +#define JEMALLOC_INTERNAL_MALLOC_DISPATCH_EXTERNS_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/tsd_types.h" + +/* Forward decls; only used as pointer types below. */ +typedef struct arena_s arena_t; +typedef struct tcache_s tcache_t; + +void *malloc_dispatch_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, bool slab, tcache_t *tcache); +void malloc_dispatch_dalloc_promoted( + tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); +void *malloc_dispatch_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, + size_t oldsize, size_t size, size_t alignment, bool zero, bool slab, + tcache_t *tcache); + +#endif /* JEMALLOC_INTERNAL_MALLOC_DISPATCH_EXTERNS_H */ diff --git a/include/jemalloc/internal/malloc_dispatch_inlines.h b/include/jemalloc/internal/malloc_dispatch_inlines.h new file mode 100644 index 00000000..a3b10c48 --- /dev/null +++ b/include/jemalloc/internal/malloc_dispatch_inlines.h @@ -0,0 +1,279 @@ +#ifndef JEMALLOC_INTERNAL_MALLOC_DISPATCH_INLINES_H +#define JEMALLOC_INTERNAL_MALLOC_DISPATCH_INLINES_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/bin_inlines.h" +#include "jemalloc/internal/div.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/malloc_dispatch_externs.h" +#include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache_inlines.h" + +JEMALLOC_ALWAYS_INLINE void * +malloc_dispatch_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, + bool zero, bool slab, tcache_t *tcache, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + + if (likely(tcache != NULL)) { + if (likely(slab)) { + assert(sz_can_use_slab(size)); + return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache, + size, ind, zero, slow_path); + } else if (likely(tcache_can_cache_large(tcache, ind))) { + return tcache_alloc_large(tsdn_tsd(tsdn), arena, tcache, + size, ind, zero, slow_path); + } + /* (size > tcache_max) case falls through. */ + } + + return arena_malloc_hard(tsdn, arena, size, ind, zero, slab); +} + +static inline void +malloc_dispatch_dalloc_large_no_tcache( + tsdn_t *tsdn, void *ptr, szind_t szind, size_t usize) { + /* + * szind is still needed in this function mainly because + * szind < SC_NBINS determines not only if this is a small alloc, + * but also if szind is valid (an inactive extent would have + * szind == SC_NSIZES). + */ + if (config_prof && unlikely(szind < SC_NBINS)) { + malloc_dispatch_dalloc_promoted(tsdn, ptr, NULL, true); + } else { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + if (large_dalloc_safety_checks(edata, ptr, usize)) { + /* See the comment in isfree. */ + return; + } + large_dalloc(tsdn, edata); + } +} + +static inline void +malloc_dispatch_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { + assert(ptr != NULL); + + emap_alloc_ctx_t alloc_ctx; + emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); + + if (config_debug) { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + assert(alloc_ctx.szind == edata_szind_get(edata)); + assert(alloc_ctx.szind < SC_NSIZES); + assert(alloc_ctx.slab == edata_slab_get(edata)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) + == edata_usize_get(edata)); + } + + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + arena_dalloc_small(tsdn, ptr); + } else { + malloc_dispatch_dalloc_large_no_tcache( + tsdn, ptr, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx)); + } +} + +JEMALLOC_ALWAYS_INLINE void +malloc_dispatch_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + szind_t szind, size_t usize, bool slow_path) { + assert(!tsdn_null(tsdn) && tcache != NULL); + bool is_sample_promoted = config_prof && szind < SC_NBINS; + if (unlikely(is_sample_promoted)) { + malloc_dispatch_dalloc_promoted(tsdn, ptr, tcache, slow_path); + } else { + if (tcache_can_cache_large(tcache, szind)) { + tcache_dalloc_large( + tsdn_tsd(tsdn), tcache, ptr, szind, slow_path); + } else { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + if (large_dalloc_safety_checks(edata, ptr, usize)) { + /* See the comment in isfree. */ + return; + } + large_dalloc(tsdn, edata); + } + } +} + +JEMALLOC_ALWAYS_INLINE bool +malloc_dispatch_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) { + if (!config_debug) { + return false; + } + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); + szind_t binind = edata_szind_get(edata); + div_info_t div_info = arena_binind_div_info[binind]; + /* + * Calls the internal function bin_slab_regind_impl because the + * safety check does not require a lock. + */ + size_t regind = bin_slab_regind_impl(&div_info, binind, edata, ptr); + slab_data_t *slab_data = edata_slab_data_get(edata); + const bin_info_t *bin_info = &bin_infos[binind]; + assert(edata_nfree_get(edata) < bin_info->nregs); + if (unlikely(!bitmap_get( + slab_data->bitmap, &bin_info->bitmap_info, regind))) { + safety_check_fail( + "Invalid deallocation detected: the pointer being freed (%p) not " + "currently active, possibly caused by double free bugs.\n", + ptr); + return true; + } + return false; +} + +JEMALLOC_ALWAYS_INLINE void +malloc_dispatch_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + + if (unlikely(tcache == NULL)) { + malloc_dispatch_dalloc_no_tcache(tsdn, ptr); + return; + } + + emap_alloc_ctx_t alloc_ctx; + if (caller_alloc_ctx != NULL) { + alloc_ctx = *caller_alloc_ctx; + } else { + util_assume(tsdn != NULL); + emap_alloc_ctx_lookup( + tsdn, &arena_emap_global, ptr, &alloc_ctx); + } + + if (config_debug) { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + assert(alloc_ctx.szind == edata_szind_get(edata)); + assert(alloc_ctx.szind < SC_NSIZES); + assert(alloc_ctx.slab == edata_slab_get(edata)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) + == edata_usize_get(edata)); + } + + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + if (malloc_dispatch_dalloc_small_safety_check(tsdn, ptr)) { + return; + } + tcache_dalloc_small( + tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); + } else { + malloc_dispatch_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx), slow_path); + } +} + +static inline void +malloc_dispatch_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { + assert(ptr != NULL); + assert(size <= SC_LARGE_MAXCLASS); + + emap_alloc_ctx_t alloc_ctx; + if (!config_prof || !opt_prof) { + /* + * There is no risk of being confused by a promoted sampled + * object, so base szind and slab on the given size. + */ + szind_t szind = sz_size2index(size); + emap_alloc_ctx_init( + &alloc_ctx, szind, (szind < SC_NBINS), size); + } + + if ((config_prof && opt_prof) || config_debug) { + emap_alloc_ctx_lookup( + tsdn, &arena_emap_global, ptr, &alloc_ctx); + + assert(alloc_ctx.szind == sz_size2index(size)); + assert((config_prof && opt_prof) + || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS)); + + if (config_debug) { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + assert(alloc_ctx.szind == edata_szind_get(edata)); + assert(alloc_ctx.slab == edata_slab_get(edata)); + } + } + + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + arena_dalloc_small(tsdn, ptr); + } else { + malloc_dispatch_dalloc_large_no_tcache( + tsdn, ptr, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx)); + } +} + +JEMALLOC_ALWAYS_INLINE void +malloc_dispatch_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + assert(size <= SC_LARGE_MAXCLASS); + + if (unlikely(tcache == NULL)) { + malloc_dispatch_sdalloc_no_tcache(tsdn, ptr, size); + return; + } + + emap_alloc_ctx_t alloc_ctx; + if (config_prof && opt_prof) { + if (caller_alloc_ctx == NULL) { + /* Uncommon case and should be a static check. */ + emap_alloc_ctx_lookup( + tsdn, &arena_emap_global, ptr, &alloc_ctx); + assert(alloc_ctx.szind == sz_size2index(size)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size); + } else { + alloc_ctx = *caller_alloc_ctx; + } + } else { + /* + * There is no risk of being confused by a promoted sampled + * object, so base szind and slab on the given size. + */ + alloc_ctx.szind = sz_size2index(size); + alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + } + + if (config_debug) { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + assert(alloc_ctx.szind == edata_szind_get(edata)); + assert(alloc_ctx.slab == edata_slab_get(edata)); + emap_alloc_ctx_init( + &alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, sz_s2u(size)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) + == edata_usize_get(edata)); + } + + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + if (malloc_dispatch_dalloc_small_safety_check(tsdn, ptr)) { + return; + } + tcache_dalloc_small( + tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); + } else { + malloc_dispatch_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, + sz_s2u(size), slow_path); + } +} + +#endif /* JEMALLOC_INTERNAL_MALLOC_DISPATCH_INLINES_H */ diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index 1ba81aad..a48ca889 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -73,6 +73,7 @@ + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index c196ce59..e4bbe65a 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -94,6 +94,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index 62c36ea5..bc2685c0 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -73,6 +73,7 @@ + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index c196ce59..e4bbe65a 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -94,6 +94,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj index ed35784b..dffda081 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj @@ -73,6 +73,7 @@ + diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters index c196ce59..e4bbe65a 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters @@ -94,6 +94,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj index 7c84196d..c48f9a7b 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj @@ -73,6 +73,7 @@ + diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters index c196ce59..e4bbe65a 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters @@ -94,6 +94,9 @@ Source Files + + Source Files + Source Files diff --git a/src/arena.c b/src/arena.c index d8bd7ae7..8f8b11fb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -694,11 +694,13 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) { assert(isalloc(tsdn, ptr) == usize); } -static size_t +size_t arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) { cassert(config_prof); + assert(opt_prof); assert(ptr != NULL); - size_t usize = isalloc(tsdn, ptr); + size_t usize = edata_usize_get(edata); + assert(isalloc(tsdn, ptr) == usize); size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT); assert(bumped_usize <= SC_LARGE_MINCLASS && PAGE_CEILING(bumped_usize) == bumped_usize); @@ -710,17 +712,6 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) { assert(isalloc(tsdn, ptr) == bumped_usize); - return bumped_usize; -} - -static void -arena_dalloc_promoted_impl( - tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path, edata_t *edata) { - cassert(config_prof); - assert(opt_prof); - - size_t usize = edata_usize_get(edata); - size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr); if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) { /* * Currently, we only do redzoning for small sampled @@ -728,21 +719,8 @@ arena_dalloc_promoted_impl( */ safety_check_verify_redzone(ptr, usize, bumped_usize); } - szind_t bumped_ind = sz_size2index(bumped_usize); - if (bumped_usize >= SC_LARGE_MINCLASS && tcache != NULL - && tcache_can_cache_large(tcache, bumped_ind)) { - tcache_dalloc_large( - tsdn_tsd(tsdn), tcache, ptr, bumped_ind, slow_path); - } else { - large_dalloc(tsdn, edata); - } -} -void -arena_dalloc_promoted( - tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) { - edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - arena_dalloc_promoted_impl(tsdn, ptr, tcache, slow_path, edata); + return bumped_usize; } void @@ -784,8 +762,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) { prof_free(tsd, ptr, usize, &alloc_ctx); } if (config_prof && opt_prof && alloc_ctx.szind < SC_NBINS) { - arena_dalloc_promoted_impl(tsd_tsdn(tsd), ptr, - /* tcache */ NULL, /* slow_path */ true, edata); + arena_prof_demote(tsd_tsdn(tsd), edata, ptr); + large_dalloc(tsd_tsdn(tsd), edata); } else { large_dalloc(tsd_tsdn(tsd), edata); } @@ -1154,33 +1132,6 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, } } -void * -arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, - bool zero, bool slab, tcache_t *tcache) { - if (slab) { - assert(sz_can_use_slab(usize)); - /* Small; alignment doesn't require special slab placement. */ - - /* usize should be a result of sz_sa2u() */ - assert((usize & (alignment - 1)) == 0); - - /* - * Small usize can't come from an alignment larger than a page. - */ - assert(alignment <= PAGE); - - return arena_malloc(tsdn, arena, usize, sz_size2index(usize), - zero, slab, tcache, true); - } else { - if (likely(alignment <= CACHELINE)) { - return large_malloc(tsdn, arena, usize, zero); - } else { - return large_palloc( - tsdn, arena, usize, alignment, zero); - } - } -} - static void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) { szind_t binind = edata_szind_get(edata); @@ -1607,64 +1558,6 @@ done: return ret; } -static void * -arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero, bool slab, tcache_t *tcache) { - if (alignment == 0) { - return arena_malloc(tsdn, arena, usize, sz_size2index(usize), - zero, slab, tcache, true); - } - usize = sz_sa2u(usize, alignment); - if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { - return NULL; - } - return ipalloct_explicit_slab( - tsdn, usize, alignment, zero, slab, tcache, arena); -} - -void * -arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache) { - size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment); - if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) { - return NULL; - } - - if (likely(slab)) { - assert(sz_can_use_slab(usize)); - /* Try to avoid moving the allocation. */ - UNUSED size_t newsize; - if (!arena_ralloc_no_move( - tsdn, ptr, oldsize, usize, 0, zero, &newsize)) { - return ptr; - } - } - - if (oldsize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS) { - return large_ralloc(tsdn, arena, ptr, usize, alignment, zero, - tcache); - } - - /* - * size and oldsize are different enough that we need to move the - * object. In that case, fall back to allocating new space and copying. - */ - void *ret = arena_ralloc_move_helper( - tsdn, arena, usize, alignment, zero, slab, tcache); - if (ret == NULL) { - return NULL; - } - - /* - * Junk/zero-filling were already done by - * ipalloc()/arena_malloc(). - */ - size_t copysize = (usize < oldsize) ? usize : oldsize; - memcpy(ret, ptr, copysize); - isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); - return ret; -} - ehooks_t * arena_get_ehooks(const arena_t *arena) { return base_ehooks_get(arena->base); diff --git a/src/malloc_dispatch.c b/src/malloc_dispatch.c new file mode 100644 index 00000000..ea8d2817 --- /dev/null +++ b/src/malloc_dispatch.c @@ -0,0 +1,109 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/assert.h" +#include "jemalloc/internal/emap.h" + +/******************************************************************************/ + +void +malloc_dispatch_dalloc_promoted( + tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) { + cassert(config_prof); + assert(opt_prof); + + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); + size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr); + szind_t bumped_ind = sz_size2index(bumped_usize); + if (bumped_usize >= SC_LARGE_MINCLASS && tcache != NULL + && tcache_can_cache_large(tcache, bumped_ind)) { + tcache_dalloc_large( + tsdn_tsd(tsdn), tcache, ptr, bumped_ind, slow_path); + } else { + large_dalloc(tsdn, edata); + } +} + +void * +malloc_dispatch_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, bool slab, tcache_t *tcache) { + if (slab) { + assert(sz_can_use_slab(usize)); + /* Small; alignment doesn't require special slab placement. */ + + /* usize should be a result of sz_sa2u() */ + assert((usize & (alignment - 1)) == 0); + + /* + * Small usize can't come from an alignment larger than a page. + */ + assert(alignment <= PAGE); + + return malloc_dispatch_malloc(tsdn, arena, usize, + sz_size2index(usize), zero, slab, tcache, true); + } else { + if (likely(alignment <= CACHELINE)) { + return large_malloc(tsdn, arena, usize, zero); + } else { + return large_palloc( + tsdn, arena, usize, alignment, zero); + } + } +} + +static void * +malloc_dispatch_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, bool slab, tcache_t *tcache) { + if (alignment == 0) { + return malloc_dispatch_malloc(tsdn, arena, usize, + sz_size2index(usize), zero, slab, tcache, true); + } + usize = sz_sa2u(usize, alignment); + if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { + return NULL; + } + return ipalloct_explicit_slab( + tsdn, usize, alignment, zero, slab, tcache, arena); +} + +void * +malloc_dispatch_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache) { + size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment); + if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) { + return NULL; + } + + if (likely(slab)) { + assert(sz_can_use_slab(usize)); + /* Try to avoid moving the allocation. */ + UNUSED size_t newsize; + if (!arena_ralloc_no_move( + tsdn, ptr, oldsize, usize, 0, zero, &newsize)) { + return ptr; + } + } + + if (oldsize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS) { + return large_ralloc(tsdn, arena, ptr, usize, alignment, zero, + tcache); + } + + /* + * size and oldsize are different enough that we need to move the + * object. In that case, fall back to allocating new space and copying. + */ + void *ret = malloc_dispatch_ralloc_move_helper( + tsdn, arena, usize, alignment, zero, slab, tcache); + if (ret == NULL) { + return NULL; + } + + /* + * Junk/zero-filling were already done by ipalloc() / dispatch alloc. + */ + size_t copysize = (usize < oldsize) ? usize : oldsize; + memcpy(ret, ptr, copysize); + isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); + return ret; +} From 71a25222ee9d2f7169ff2af28b72892020f3a67c Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 17:24:21 -0400 Subject: [PATCH 03/19] Break jemalloc_preamble.h <-> test_hooks.h #include cycle --- include/jemalloc/internal/test_hooks.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h index 35f3a211..dfe8a86e 100644 --- a/include/jemalloc/internal/test_hooks.h +++ b/include/jemalloc/internal/test_hooks.h @@ -1,8 +1,6 @@ #ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H #define JEMALLOC_INTERNAL_TEST_HOOKS_H -#include "jemalloc/internal/jemalloc_preamble.h" - extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void); extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void); From 26cd9389d3839ddceec4486594f67cecaf73a4c7 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 17:49:46 -0400 Subject: [PATCH 04/19] Break edata.h <-> prof_types.h #include coupling edata.h only uses prof_tctx_t and prof_recent_t as opaque pointer types (in two getters, two setters, and two struct fields), so forward declarations are sufficient. Drop the #include of prof_types.h and declare the two typedefs locally. --- include/jemalloc/internal/edata.h | 5 ++++- include/jemalloc/internal/prof_structs.h | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 217232f7..198d55e1 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -8,13 +8,16 @@ #include "jemalloc/internal/hpdata.h" #include "jemalloc/internal/nstime.h" #include "jemalloc/internal/ph.h" -#include "jemalloc/internal/prof_types.h" #include "jemalloc/internal/ql.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/slab_data.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/typed_list.h" +/* Opaque to edata; only stored as pointers in e_prof_info_t. */ +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_recent_s prof_recent_t; + /* * sizeof(edata_t) is 128 bytes on 64-bit architectures. Ensure the alignment * to free up the low bits in the rtree leaf. diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h index d38b15ea..d3a13718 100644 --- a/include/jemalloc/internal/prof_structs.h +++ b/include/jemalloc/internal/prof_structs.h @@ -3,7 +3,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/edata.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/rb.h" From fb92d8a9162ae5aa56867bee2f75eb68fde0c381 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 17:24:33 -0400 Subject: [PATCH 05/19] Consolidate simple component headers (large, background_thread, bin) Folds several historical *_types/_structs/_externs/_inlines splits where the layering is no longer load-bearing. - large_externs.h -> large.h: renamed; it was a single-purpose function-prototype file. - background_thread_structs.h + background_thread_externs.h -> background_thread.h: merged. background_thread_inlines.h is kept separate because it depends on arena_inlines_a.h. - bin_inlines.h folded into bin.h, along with BIN_SHARDS_MAX / N_BIN_SHARDS_DEFAULT from bin_types.h. bin.h carries a forward decl of arena_binind_div_info (declared in arena_externs.h) so it stays hermetic without re-introducing the bin.h <-> arena_externs.h cycle. - tsd_binshards.h (new): houses tsd_binshards_t and its zero initializer. Keeping these out of bin.h lets tsd_internals.h pull in just what it needs during X-macro expansion, avoiding bin.h's mutex.h dependency (mutex.h itself depends on TSD machinery, so routing it through tsd_internals.h forms a chicken-and-egg). jemalloc_internal_includes.h: drops the now-redundant references to the deleted/merged headers. --- include/jemalloc/internal/arena_inlines_b.h | 3 +- ...d_thread_structs.h => background_thread.h} | 38 +++++- .../internal/background_thread_externs.h | 38 ------ .../internal/background_thread_inlines.h | 2 +- include/jemalloc/internal/bin.h | 98 ++++++++++++++- include/jemalloc/internal/bin_inlines.h | 112 ------------------ include/jemalloc/internal/bin_types.h | 21 ---- include/jemalloc/internal/ctl.h | 2 +- .../internal/jemalloc_internal_includes.h | 5 +- .../internal/{large_externs.h => large.h} | 6 +- .../internal/malloc_dispatch_inlines.h | 4 +- include/jemalloc/internal/tcache_inlines.h | 2 +- include/jemalloc/internal/tsd_binshards.h | 24 ++++ include/jemalloc/internal/tsd_internals.h | 2 +- src/arena.c | 8 ++ test/unit/bin.c | 3 + test/unit/slab.c | 3 + 17 files changed, 180 insertions(+), 191 deletions(-) rename include/jemalloc/internal/{background_thread_structs.h => background_thread.h} (58%) delete mode 100644 include/jemalloc/internal/background_thread_externs.h delete mode 100644 include/jemalloc/internal/bin_inlines.h delete mode 100644 include/jemalloc/internal/bin_types.h rename include/jemalloc/internal/{large_externs.h => large.h} (87%) create mode 100644 include/jemalloc/internal/tsd_binshards.h diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 005f422e..5f0420c9 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -4,12 +4,11 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena_externs.h" #include "jemalloc/internal/arena_structs.h" -#include "jemalloc/internal/bin_inlines.h" #include "jemalloc/internal/div.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/prof_externs.h" #include "jemalloc/internal/prof_structs.h" diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread.h similarity index 58% rename from include/jemalloc/internal/background_thread_structs.h rename to include/jemalloc/internal/background_thread.h index d56673da..db26f1b8 100644 --- a/include/jemalloc/internal/background_thread_structs.h +++ b/include/jemalloc/internal/background_thread.h @@ -1,11 +1,10 @@ -#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H -#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_H #include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/base.h" #include "jemalloc/internal/mutex.h" -/* This file really combines "structs" and "types", but only transitionally. */ - #if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) # define JEMALLOC_PTHREAD_CREATE_WRAPPER #endif @@ -66,4 +65,33 @@ struct background_thread_stats_s { }; typedef struct background_thread_stats_s background_thread_stats_t; -#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */ +extern bool opt_background_thread; +extern size_t opt_max_background_threads; +extern malloc_mutex_t background_thread_lock; +extern atomic_b_t background_thread_enabled_state; +extern size_t n_background_threads; +extern size_t max_background_threads; +extern background_thread_info_t *background_thread_info; + +bool background_thread_create(tsd_t *tsd, unsigned arena_ind); +bool background_threads_enable(tsd_t *tsd); +bool background_threads_disable(tsd_t *tsd); +bool background_thread_is_started(background_thread_info_t *info); +void background_thread_wakeup_early( + background_thread_info_t *info, nstime_t *remaining_sleep); +void background_thread_prefork0(tsdn_t *tsdn); +void background_thread_prefork1(tsdn_t *tsdn); +void background_thread_postfork_parent(tsdn_t *tsdn); +void background_thread_postfork_child(tsdn_t *tsdn); +bool background_thread_stats_read( + tsdn_t *tsdn, background_thread_stats_t *stats); +void background_thread_ctl_init(tsdn_t *tsdn); + +#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER +extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); +#endif +bool background_thread_boot0(void); +bool background_thread_boot1(tsdn_t *tsdn, base_t *base); + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_H */ diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h deleted file mode 100644 index efc0aaa4..00000000 --- a/include/jemalloc/internal/background_thread_externs.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H -#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/background_thread_structs.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/mutex.h" - -extern bool opt_background_thread; -extern size_t opt_max_background_threads; -extern malloc_mutex_t background_thread_lock; -extern atomic_b_t background_thread_enabled_state; -extern size_t n_background_threads; -extern size_t max_background_threads; -extern background_thread_info_t *background_thread_info; - -bool background_thread_create(tsd_t *tsd, unsigned arena_ind); -bool background_threads_enable(tsd_t *tsd); -bool background_threads_disable(tsd_t *tsd); -bool background_thread_is_started(background_thread_info_t *info); -void background_thread_wakeup_early( - background_thread_info_t *info, nstime_t *remaining_sleep); -void background_thread_prefork0(tsdn_t *tsdn); -void background_thread_prefork1(tsdn_t *tsdn); -void background_thread_postfork_parent(tsdn_t *tsdn); -void background_thread_postfork_child(tsdn_t *tsdn); -bool background_thread_stats_read( - tsdn_t *tsdn, background_thread_stats_t *stats); -void background_thread_ctl_init(tsdn_t *tsdn); - -#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER -extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *, - void *(*)(void *), void *__restrict); -#endif -bool background_thread_boot0(void); -bool background_thread_boot1(tsdn_t *tsdn, base_t *base); - -#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */ diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h index e822a3f7..67517626 100644 --- a/include/jemalloc/internal/background_thread_inlines.h +++ b/include/jemalloc/internal/background_thread_inlines.h @@ -4,7 +4,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena_inlines_a.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/background_thread_externs.h" +#include "jemalloc/internal/background_thread.h" JEMALLOC_ALWAYS_INLINE bool background_thread_enabled(void) { diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h index 45c6c5dd..626a459c 100644 --- a/include/jemalloc/internal/bin.h +++ b/include/jemalloc/internal/bin.h @@ -4,11 +4,15 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/bin_info.h" #include "jemalloc/internal/bin_stats.h" -#include "jemalloc/internal/bin_types.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/div.h" #include "jemalloc/internal/edata.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/sc.h" +#define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH) +#define N_BIN_SHARDS_DEFAULT 1 + /* * A bin contains a set of extents that are currently being used for slab * allocations. @@ -128,4 +132,96 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) { malloc_mutex_unlock(tsdn, &bin->lock); } +/* + * The dalloc bin info contains just the information that the common paths need + * during tcache flushes. By force-inlining these paths, and using local copies + * of data (so that the compiler knows it's constant), we avoid a whole bunch of + * redundant loads and stores by leaving this information in registers. + */ +typedef struct bin_dalloc_locked_info_s bin_dalloc_locked_info_t; +struct bin_dalloc_locked_info_s { + div_info_t div_info; + uint32_t nregs; + uint64_t ndalloc; +}; + +/* Find the region index of a pointer within a slab. */ +JEMALLOC_ALWAYS_INLINE size_t +bin_slab_regind_impl(const div_info_t *div_info, szind_t binind, + const edata_t *slab, const void *ptr) { + size_t diff, regind; + + /* Freeing a pointer outside the slab can cause assertion failure. */ + assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab)); + assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab)); + /* Freeing an interior pointer can cause assertion failure. */ + assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) + % (uintptr_t)bin_infos[binind].reg_size + == 0); + + diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)); + + /* Avoid doing division with a variable divisor. */ + regind = div_compute(div_info, diff); + assert(regind < bin_infos[binind].nregs); + return regind; +} + +JEMALLOC_ALWAYS_INLINE size_t +bin_slab_regind(const bin_dalloc_locked_info_t *info, szind_t binind, + const edata_t *slab, const void *ptr) { + size_t regind = bin_slab_regind_impl( + &info->div_info, binind, slab, ptr); + return regind; +} + +/* + * Does the deallocation work associated with freeing a single pointer (a + * "step") in between a bin_dalloc_locked begin and end call. + * + * Returns true if arena_slab_dalloc must be called on slab. Doesn't do + * stats updates, which happen during finish (this lets running counts get left + * in a register). + */ +JEMALLOC_ALWAYS_INLINE bool +bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin, + bin_dalloc_locked_info_t *info, szind_t binind, edata_t *slab, + void *ptr) { + const bin_info_t *bin_info = &bin_infos[binind]; + size_t regind = bin_slab_regind(info, binind, slab, ptr); + slab_data_t *slab_data = edata_slab_data_get(slab); + + assert(edata_nfree_get(slab) < bin_info->nregs); + /* Freeing an unallocated pointer can cause assertion failure. */ + assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind)); + + bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind); + edata_nfree_inc(slab); + + if (config_stats) { + info->ndalloc++; + } + + unsigned nfree = edata_nfree_get(slab); + if (nfree == bin_info->nregs) { + bin_dalloc_locked_handle_newly_empty( + tsdn, is_auto, slab, bin); + return true; + } else if (nfree == 1 && slab != bin->slabcur) { + bin_dalloc_locked_handle_newly_nonempty( + tsdn, is_auto, slab, bin); + } + return false; +} + +JEMALLOC_ALWAYS_INLINE void +bin_dalloc_locked_finish(tsdn_t *tsdn, bin_t *bin, + bin_dalloc_locked_info_t *info) { + if (config_stats) { + bin->stats.ndalloc += info->ndalloc; + assert(bin->stats.curregs >= (size_t)info->ndalloc); + bin->stats.curregs -= (size_t)info->ndalloc; + } +} + #endif /* JEMALLOC_INTERNAL_BIN_H */ diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h deleted file mode 100644 index 31fe4818..00000000 --- a/include/jemalloc/internal/bin_inlines.h +++ /dev/null @@ -1,112 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_BIN_INLINES_H -#define JEMALLOC_INTERNAL_BIN_INLINES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/bin.h" -#include "jemalloc/internal/bin_info.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/div.h" -#include "jemalloc/internal/edata.h" -#include "jemalloc/internal/sc.h" - -/* - * The dalloc bin info contains just the information that the common paths need - * during tcache flushes. By force-inlining these paths, and using local copies - * of data (so that the compiler knows it's constant), we avoid a whole bunch of - * redundant loads and stores by leaving this information in registers. - */ -typedef struct bin_dalloc_locked_info_s bin_dalloc_locked_info_t; -struct bin_dalloc_locked_info_s { - div_info_t div_info; - uint32_t nregs; - uint64_t ndalloc; -}; - -/* Find the region index of a pointer within a slab. */ -JEMALLOC_ALWAYS_INLINE size_t -bin_slab_regind_impl(const div_info_t *div_info, szind_t binind, - const edata_t *slab, const void *ptr) { - size_t diff, regind; - - /* Freeing a pointer outside the slab can cause assertion failure. */ - assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab)); - assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab)); - /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) - % (uintptr_t)bin_infos[binind].reg_size - == 0); - - diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)); - - /* Avoid doing division with a variable divisor. */ - regind = div_compute(div_info, diff); - assert(regind < bin_infos[binind].nregs); - return regind; -} - -JEMALLOC_ALWAYS_INLINE size_t -bin_slab_regind(const bin_dalloc_locked_info_t *info, szind_t binind, - const edata_t *slab, const void *ptr) { - size_t regind = bin_slab_regind_impl( - &info->div_info, binind, slab, ptr); - return regind; -} - -JEMALLOC_ALWAYS_INLINE void -bin_dalloc_locked_begin( - bin_dalloc_locked_info_t *info, szind_t binind) { - info->div_info = arena_binind_div_info[binind]; - info->nregs = bin_infos[binind].nregs; - info->ndalloc = 0; -} - -/* - * Does the deallocation work associated with freeing a single pointer (a - * "step") in between a bin_dalloc_locked begin and end call. - * - * Returns true if arena_slab_dalloc must be called on slab. Doesn't do - * stats updates, which happen during finish (this lets running counts get left - * in a register). - */ -JEMALLOC_ALWAYS_INLINE bool -bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin, - bin_dalloc_locked_info_t *info, szind_t binind, edata_t *slab, - void *ptr) { - const bin_info_t *bin_info = &bin_infos[binind]; - size_t regind = bin_slab_regind(info, binind, slab, ptr); - slab_data_t *slab_data = edata_slab_data_get(slab); - - assert(edata_nfree_get(slab) < bin_info->nregs); - /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind)); - - bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind); - edata_nfree_inc(slab); - - if (config_stats) { - info->ndalloc++; - } - - unsigned nfree = edata_nfree_get(slab); - if (nfree == bin_info->nregs) { - bin_dalloc_locked_handle_newly_empty( - tsdn, is_auto, slab, bin); - return true; - } else if (nfree == 1 && slab != bin->slabcur) { - bin_dalloc_locked_handle_newly_nonempty( - tsdn, is_auto, slab, bin); - } - return false; -} - -JEMALLOC_ALWAYS_INLINE void -bin_dalloc_locked_finish(tsdn_t *tsdn, bin_t *bin, - bin_dalloc_locked_info_t *info) { - if (config_stats) { - bin->stats.ndalloc += info->ndalloc; - assert(bin->stats.curregs >= (size_t)info->ndalloc); - bin->stats.curregs -= (size_t)info->ndalloc; - } -} - -#endif /* JEMALLOC_INTERNAL_BIN_INLINES_H */ diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h deleted file mode 100644 index b6bad37e..00000000 --- a/include/jemalloc/internal/bin_types.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_BIN_TYPES_H -#define JEMALLOC_INTERNAL_BIN_TYPES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/sc.h" - -#define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH) -#define N_BIN_SHARDS_DEFAULT 1 - -/* Used in TSD static initializer only. Real init in arena_bind(). */ -#define TSD_BINSHARDS_ZERO_INITIALIZER \ - { \ - { UINT8_MAX } \ - } - -typedef struct tsd_binshards_s tsd_binshards_t; -struct tsd_binshards_s { - uint8_t binshard[SC_NBINS]; -}; - -#endif /* JEMALLOC_INTERNAL_BIN_TYPES_H */ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index e7a8221c..09f1825c 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -3,7 +3,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena_stats.h" -#include "jemalloc/internal/background_thread_structs.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/bin_stats.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/malloc_io.h" diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h index d70c808f..86e2aea1 100644 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -51,7 +51,6 @@ #include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/arena_structs.h" #include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/background_thread_structs.h" /******************************************************************************/ /* EXTERNS */ @@ -59,11 +58,11 @@ #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/arena_externs.h" -#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/tcache_externs.h" #include "jemalloc/internal/malloc_dispatch_externs.h" #include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/background_thread_externs.h" +#include "jemalloc/internal/background_thread.h" /******************************************************************************/ /* INLINES */ diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large.h similarity index 87% rename from include/jemalloc/internal/large_externs.h rename to include/jemalloc/internal/large.h index 82abd7de..e28841c1 100644 --- a/include/jemalloc/internal/large_externs.h +++ b/include/jemalloc/internal/large.h @@ -1,5 +1,5 @@ -#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H -#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H +#ifndef JEMALLOC_INTERNAL_LARGE_H +#define JEMALLOC_INTERNAL_LARGE_H #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/edata.h" @@ -20,4 +20,4 @@ void large_prof_info_get( void large_prof_tctx_reset(edata_t *edata); void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size); -#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */ +#endif /* JEMALLOC_INTERNAL_LARGE_H */ diff --git a/include/jemalloc/internal/malloc_dispatch_inlines.h b/include/jemalloc/internal/malloc_dispatch_inlines.h index a3b10c48..20a6c5ee 100644 --- a/include/jemalloc/internal/malloc_dispatch_inlines.h +++ b/include/jemalloc/internal/malloc_dispatch_inlines.h @@ -4,12 +4,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena_externs.h" #include "jemalloc/internal/arena_inlines_b.h" -#include "jemalloc/internal/bin_inlines.h" +#include "jemalloc/internal/bin.h" #include "jemalloc/internal/div.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/malloc_dispatch_externs.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index b3ce81bd..181db1b3 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -6,7 +6,7 @@ #include "jemalloc/internal/bin.h" #include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" diff --git a/include/jemalloc/internal/tsd_binshards.h b/include/jemalloc/internal/tsd_binshards.h new file mode 100644 index 00000000..a4afc496 --- /dev/null +++ b/include/jemalloc/internal/tsd_binshards.h @@ -0,0 +1,24 @@ +#ifndef JEMALLOC_INTERNAL_TSD_BINSHARDS_H +#define JEMALLOC_INTERNAL_TSD_BINSHARDS_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/sc.h" + +/* + * Per-thread cache of bin-shard assignments. This lives in its own header + * (separate from bin.h) so that tsd_internals.h can pull it in for X-macro + * expansion without dragging in mutex.h, which itself depends on TSD machinery + * and would form an include-order dependency cycle. + */ + +#define TSD_BINSHARDS_ZERO_INITIALIZER \ + { \ + { UINT8_MAX } \ + } + +typedef struct tsd_binshards_s tsd_binshards_t; +struct tsd_binshards_s { + uint8_t binshard[SC_NBINS]; +}; + +#endif /* JEMALLOC_INTERNAL_TSD_BINSHARDS_H */ diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h index 46b4930f..a7b6fa5e 100644 --- a/include/jemalloc/internal/tsd_internals.h +++ b/include/jemalloc/internal/tsd_internals.h @@ -6,7 +6,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena_types.h" #include "jemalloc/internal/assert.h" -#include "jemalloc/internal/bin_types.h" +#include "jemalloc/internal/tsd_binshards.h" #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/peak.h" #include "jemalloc/internal/prof_types.h" diff --git a/src/arena.c b/src/arena.c index 8f8b11fb..84b97549 100644 --- a/src/arena.c +++ b/src/arena.c @@ -36,6 +36,14 @@ static pa_central_t arena_pa_central_global; div_info_t arena_binind_div_info[SC_NBINS]; +JET_EXTERN void +bin_dalloc_locked_begin( + bin_dalloc_locked_info_t *info, szind_t binind) { + info->div_info = arena_binind_div_info[binind]; + info->nregs = bin_infos[binind].nregs; + info->ndalloc = 0; +} + size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; diff --git a/test/unit/bin.c b/test/unit/bin.c index 08dd4665..a7091544 100644 --- a/test/unit/bin.c +++ b/test/unit/bin.c @@ -1,5 +1,8 @@ #include "test/jemalloc_test.h" +extern void bin_dalloc_locked_begin( + bin_dalloc_locked_info_t *info, szind_t binind); + #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1) /* Create a page-aligned mock slab with all regions free. */ diff --git a/test/unit/slab.c b/test/unit/slab.c index d98663e8..e7adafea 100644 --- a/test/unit/slab.c +++ b/test/unit/slab.c @@ -1,5 +1,8 @@ #include "test/jemalloc_test.h" +extern void bin_dalloc_locked_begin( + bin_dalloc_locked_info_t *info, szind_t binind); + #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1) TEST_BEGIN(test_bin_slab_regind) { From ab4c178444f0a305c21cb306b9504f5f6c461f75 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 18:07:16 -0400 Subject: [PATCH 06/19] Consolidate prof_* and tcache_* header splits Each of these components had a four-way split (_types, _structs, _externs, _inlines) that dates back to the old "include each section multiple times from a master file" pattern. With Step 2's edata <-> prof_types decoupling, merging _types + _structs + _externs into one header per component no longer risks recreating an include cycle. - prof.h replaces prof_types.h + prof_structs.h + prof_externs.h. - tcache.h replaces tcache_types.h + tcache_structs.h + tcache_externs.h. prof_inlines.h and tcache_inlines.h are kept separate: prof_inlines.h sits at the bottom of the dependency layering, and tcache_inlines.h's include of arena_externs.h is the asymmetric cycle-breaker that keeps the arena<->tcache symbol cycle from becoming an include cycle. Two surprises required adjustments beyond a straight concatenation: 1. te_prof_sample_event_lookahead was a JEMALLOC_ALWAYS_INLINE function defined in prof_externs.h, but its body calls tsd_thread_allocated_* accessors that only exist after tsd inlines are loaded. The original layering hid this because prof_externs.h was only included near the bottom of jemalloc_internal_includes.h. After consolidation, tsd_internals.h's includes pull prof.h in earlier, exposing the ordering dependency. Moved the inline to prof_inlines.h (where inline definitions belong anyway) and left only the related extern in prof.h. 2. base.h was included from prof_externs.h and tcache_externs.h purely for base_t * pointer arguments on a couple of declarations. Carrying that include into the merged prof.h / tcache.h would pull ehooks.h (-> tsd.h) into tsd_internals.h before tsd_internals.h finishes declaring its tsd accessors. Replaced with a forward declaration of base_t in each merged file. Similarly, tsd_internals.h's prior #include of prof_types.h becomes a forward decl of prof_tdata_t (the only prof symbol it references, and only as a pointer), and large.h needs a forward decl of prof_info_t because large.h is loaded before prof.h in the new master ordering. No inline / static qualifiers are dropped; only the one inline moves files. #ifdef blocks (JEMALLOC_PROF, JEMALLOC_PROF_LIBGCC, JEMALLOC_PROF_GCC, JEMALLOC_DEBUG) are kept intact. --- include/jemalloc/internal/arena_inlines_b.h | 3 +- .../internal/jemalloc_internal_includes.h | 8 +- .../internal/jemalloc_internal_inlines_a.h | 2 +- include/jemalloc/internal/large.h | 3 + include/jemalloc/internal/prof.h | 427 ++++++++++++++++++ include/jemalloc/internal/prof_externs.h | 132 ------ include/jemalloc/internal/prof_inlines.h | 29 +- include/jemalloc/internal/prof_structs.h | 221 --------- include/jemalloc/internal/prof_types.h | 94 ---- include/jemalloc/internal/tcache.h | 198 ++++++++ include/jemalloc/internal/tcache_externs.h | 91 ---- include/jemalloc/internal/tcache_inlines.h | 2 +- include/jemalloc/internal/tcache_structs.h | 72 --- include/jemalloc/internal/tcache_types.h | 37 -- include/jemalloc/internal/tsd_internals.h | 7 +- src/thread_event_registry.c | 4 +- 16 files changed, 666 insertions(+), 664 deletions(-) create mode 100644 include/jemalloc/internal/prof.h delete mode 100644 include/jemalloc/internal/prof_externs.h delete mode 100644 include/jemalloc/internal/prof_structs.h delete mode 100644 include/jemalloc/internal/prof_types.h create mode 100644 include/jemalloc/internal/tcache.h delete mode 100644 include/jemalloc/internal/tcache_externs.h delete mode 100644 include/jemalloc/internal/tcache_structs.h delete mode 100644 include/jemalloc/internal/tcache_types.h diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 5f0420c9..f790834c 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -10,8 +10,7 @@ #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/prof_structs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h index 86e2aea1..87ef4c82 100644 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -41,16 +41,12 @@ /******************************************************************************/ #include "jemalloc/internal/arena_types.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/prof_types.h" /******************************************************************************/ /* STRUCTS */ /******************************************************************************/ -#include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/arena_structs.h" -#include "jemalloc/internal/tcache_structs.h" /******************************************************************************/ /* EXTERNS */ @@ -59,9 +55,9 @@ #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/arena_externs.h" #include "jemalloc/internal/large.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/malloc_dispatch_externs.h" -#include "jemalloc/internal/prof_externs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/background_thread.h" /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h index 646ec5be..01771d7a 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -9,7 +9,7 @@ #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/sc.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/ticker.h" JEMALLOC_ALWAYS_INLINE malloc_cpuid_t diff --git a/include/jemalloc/internal/large.h b/include/jemalloc/internal/large.h index e28841c1..8e7bdae0 100644 --- a/include/jemalloc/internal/large.h +++ b/include/jemalloc/internal/large.h @@ -4,6 +4,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/edata.h" +/* Forward decl; only prof_info_t * is used as a pointer arg below. */ +typedef struct prof_info_s prof_info_t; + void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); void *large_palloc( tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero); diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h new file mode 100644 index 00000000..b0757886 --- /dev/null +++ b/include/jemalloc/internal/prof.h @@ -0,0 +1,427 @@ +#ifndef JEMALLOC_INTERNAL_PROF_H +#define JEMALLOC_INTERNAL_PROF_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/prof_hook.h" +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/thread_event_registry.h" + +/* Forward decl; only base_t * is used as a pointer arg below. */ +typedef struct base_s base_t; + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_info_s prof_info_t; +typedef struct prof_gctx_s prof_gctx_t; +typedef struct prof_tdata_s prof_tdata_t; +typedef struct prof_recent_s prof_recent_t; + +/* Option defaults. */ +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif +#define LG_PROF_SAMPLE_DEFAULT 19 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#ifdef JEMALLOC_PROF_GCC +# define PROF_BT_MAX_LIMIT 256 +#else +# define PROF_BT_MAX_LIMIT UINT_MAX +#endif +#define PROF_BT_MAX_DEFAULT 128 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#ifndef JEMALLOC_PROF +/* Minimize memory bloat for non-prof builds. */ +# define PROF_DUMP_BUFSIZE 1 +#elif defined(JEMALLOC_DEBUG) +/* Use a small buffer size in debug build, mainly to facilitate testing. */ +# define PROF_DUMP_BUFSIZE 16 +#else +# define PROF_DUMP_BUFSIZE 65536 +#endif + +/* Size of size class related tables */ +#ifdef JEMALLOC_PROF +# define PROF_SC_NSIZES SC_NSIZES +#else +/* Minimize memory bloat for non-prof builds. */ +# define PROF_SC_NSIZES 1 +#endif + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all gctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF +# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1) +#else +# define PROF_DUMP_FILENAME_LEN 1 +#endif + +/* Default number of recent allocations to record. */ +#define PROF_RECENT_ALLOC_MAX_DEFAULT 0 + +/* Thread name storage size limit. */ +#define PROF_THREAD_NAME_MAX_LEN 16 + +/* + * Minimum required alignment for sampled allocations. Over-aligning sampled + * allocations allows us to quickly identify them on the dalloc path without + * resorting to metadata lookup. + */ +#define PROF_SAMPLE_ALIGNMENT PAGE +#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK + +/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ +#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U)) + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + void **vec; + unsigned *len; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curobjs_shifted_unbiased; + uint64_t curbytes; + uint64_t curbytes_unbiased; + uint64_t accumobjs; + uint64_t accumobjs_shifted_unbiased; + uint64_t accumbytes; + uint64_t accumbytes_unbiased; +}; + +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; + +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; + + /* + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. + */ + uint64_t thr_uid; + uint64_t thr_discrim; + + /* + * Reference count of how many times this tctx object is referenced in + * recent allocation / deallocation records, protected by tdata->lock. + */ + uint64_t recent_count; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; + + /* + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). + */ + uint64_t tctx_uid; + + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; + + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; + +struct prof_info_s { + /* Time when the allocation was made. */ + nstime_t alloc_time; + /* Points to the prof_tctx_t corresponding to the allocation. */ + prof_tctx_t *alloc_tctx; + /* Allocation request size. */ + size_t alloc_size; +}; + +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this gctx to be in a state of + * limbo due to one of: + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * gctx. + */ + unsigned nlimbo; + + /* + * Tree of profile counters, one for each thread that has allocated in + * this context. + */ + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; + + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; +}; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; + +struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + + /* + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. + */ + uint64_t thr_discrim; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; + + /* Included in heap profile dumps if has content. */ + char thread_name[PROF_THREAD_NAME_MAX_LEN]; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + bool attached; + bool expired; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void **vec; +}; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; + +struct prof_recent_s { + nstime_t alloc_time; + nstime_t dalloc_time; + + ql_elm(prof_recent_t) link; + size_t size; + size_t usize; + atomic_p_t alloc_edata; /* NULL means allocation has been freed. */ + prof_tctx_t *alloc_tctx; + prof_tctx_t *dalloc_tctx; +}; + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + +extern bool opt_prof; +extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; +extern unsigned opt_prof_bt_max; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern bool opt_prof_log; /* Turn logging on at boot. */ +extern char opt_prof_prefix[ +/* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; +extern bool opt_prof_unbias; + +/* Include pid namespace in profile file names. */ +extern bool opt_prof_pid_namespace; + +/* For recording recent allocations */ +extern ssize_t opt_prof_recent_alloc_max; + +/* Whether to use thread name provided by the system or by mallctl. */ +extern bool opt_prof_sys_thread_name; + +/* Whether to record per size class counts and request size totals. */ +extern bool opt_prof_stats; + +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active_state; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + +/* Profile dump interval, measured in bytes allocated. */ +extern uint64_t prof_interval; + +/* + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. + */ +extern size_t lg_prof_sample; + +extern bool prof_booted; + +void prof_backtrace_hook_set(prof_backtrace_hook_t hook); +prof_backtrace_hook_t prof_backtrace_hook_get(void); + +void prof_dump_hook_set(prof_dump_hook_t hook); +prof_dump_hook_t prof_dump_hook_get(void); + +void prof_sample_hook_set(prof_sample_hook_t hook); +prof_sample_hook_t prof_sample_hook_get(void); + +void prof_sample_free_hook_set(prof_sample_free_hook_t hook); +prof_sample_free_hook_t prof_sample_free_hook_get(void); + +/* Functions only accessed in prof_inlines.h */ +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); + +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx); +void prof_malloc_sample_object( + tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx); +void prof_free_sampled_object( + tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info); +prof_tctx_t *prof_tctx_create(tsd_t *tsd); +void prof_idump(tsdn_t *tsdn); +bool prof_mdump(tsd_t *tsd, const char *filename); +void prof_gdump(tsdn_t *tsdn); + +void prof_tdata_cleanup(tsd_t *tsd); +bool prof_active_get(tsdn_t *tsdn); +bool prof_active_set(tsdn_t *tsdn, bool active); +const char *prof_thread_name_get(tsd_t *tsd); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(tsd_t *tsd); +bool prof_thread_active_set(tsd_t *tsd, bool active); +bool prof_thread_active_init_get(tsdn_t *tsdn); +bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); +bool prof_gdump_get(tsdn_t *tsdn); +bool prof_gdump_set(tsdn_t *tsdn, bool active); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(tsd_t *tsd, base_t *base); +void prof_prefork0(tsdn_t *tsdn); +void prof_prefork1(tsdn_t *tsdn); +void prof_postfork_parent(tsdn_t *tsdn); +void prof_postfork_child(tsdn_t *tsdn); + +uint64_t prof_sample_new_event_wait(tsd_t *tsd); +uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd); + +extern te_base_cb_t prof_sample_te_handler; + +#endif /* JEMALLOC_INTERNAL_PROF_H */ diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h deleted file mode 100644 index cfb28988..00000000 --- a/include/jemalloc/internal/prof_externs.h +++ /dev/null @@ -1,132 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H -#define JEMALLOC_INTERNAL_PROF_EXTERNS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prof_hook.h" -#include "jemalloc/internal/thread_event_registry.h" - -extern bool opt_prof; -extern bool opt_prof_active; -extern bool opt_prof_thread_active_init; -extern unsigned opt_prof_bt_max; -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_gdump; /* High-water memory dumping. */ -extern bool opt_prof_final; /* Final profile dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ -extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */ -extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern bool opt_prof_log; /* Turn logging on at boot. */ -extern char opt_prof_prefix[ -/* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF - PATH_MAX + -#endif - 1]; -extern bool opt_prof_unbias; - -/* Include pid namespace in profile file names. */ -extern bool opt_prof_pid_namespace; - -/* For recording recent allocations */ -extern ssize_t opt_prof_recent_alloc_max; - -/* Whether to use thread name provided by the system or by mallctl. */ -extern bool opt_prof_sys_thread_name; - -/* Whether to record per size class counts and request size totals. */ -extern bool opt_prof_stats; - -/* Accessed via prof_active_[gs]et{_unlocked,}(). */ -extern bool prof_active_state; - -/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ -extern bool prof_gdump_val; - -/* Profile dump interval, measured in bytes allocated. */ -extern uint64_t prof_interval; - -/* - * Initialized as opt_lg_prof_sample, and potentially modified during profiling - * resets. - */ -extern size_t lg_prof_sample; - -extern bool prof_booted; - -void prof_backtrace_hook_set(prof_backtrace_hook_t hook); -prof_backtrace_hook_t prof_backtrace_hook_get(void); - -void prof_dump_hook_set(prof_dump_hook_t hook); -prof_dump_hook_t prof_dump_hook_get(void); - -void prof_sample_hook_set(prof_sample_hook_t hook); -prof_sample_hook_t prof_sample_hook_get(void); - -void prof_sample_free_hook_set(prof_sample_free_hook_t hook); -prof_sample_free_hook_t prof_sample_free_hook_get(void); - -/* Functions only accessed in prof_inlines.h */ -prof_tdata_t *prof_tdata_init(tsd_t *tsd); -prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); - -void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx); -void prof_malloc_sample_object( - tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx); -void prof_free_sampled_object( - tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info); -prof_tctx_t *prof_tctx_create(tsd_t *tsd); -void prof_idump(tsdn_t *tsdn); -bool prof_mdump(tsd_t *tsd, const char *filename); -void prof_gdump(tsdn_t *tsdn); - -void prof_tdata_cleanup(tsd_t *tsd); -bool prof_active_get(tsdn_t *tsdn); -bool prof_active_set(tsdn_t *tsdn, bool active); -const char *prof_thread_name_get(tsd_t *tsd); -int prof_thread_name_set(tsd_t *tsd, const char *thread_name); -bool prof_thread_active_get(tsd_t *tsd); -bool prof_thread_active_set(tsd_t *tsd, bool active); -bool prof_thread_active_init_get(tsdn_t *tsdn); -bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); -bool prof_gdump_get(tsdn_t *tsdn); -bool prof_gdump_set(tsdn_t *tsdn, bool active); -void prof_boot0(void); -void prof_boot1(void); -bool prof_boot2(tsd_t *tsd, base_t *base); -void prof_prefork0(tsdn_t *tsdn); -void prof_prefork1(tsdn_t *tsdn); -void prof_postfork_parent(tsdn_t *tsdn); -void prof_postfork_child(tsdn_t *tsdn); - -uint64_t prof_sample_new_event_wait(tsd_t *tsd); -uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd); - -/* - * The lookahead functionality facilitates events to be able to lookahead, i.e. - * without touching the event counters, to determine whether an event would be - * triggered. The event counters are not advanced until the end of the - * allocation / deallocation calls, so the lookahead can be useful if some - * preparation work for some event must be done early in the allocation / - * deallocation calls. - * - * Currently only the profiling sampling event needs the lookahead - * functionality, so we don't yet define general purpose lookahead functions. - */ - -JEMALLOC_ALWAYS_INLINE bool -te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) { - if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { - return false; - } - /* The subtraction is intentionally susceptible to underflow. */ - uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize - - tsd_thread_allocated_last_event_get(tsd); - return accumbytes >= tsd_prof_sample_event_wait_get(tsd); -} - -extern te_base_cb_t prof_sample_te_handler; - -#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */ diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h index 4a36bd7a..19dfd1a0 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines.h @@ -4,12 +4,37 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" -#include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/prof_structs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" +/* + * The lookahead functionality facilitates events to be able to lookahead, i.e. + * without touching the event counters, to determine whether an event would be + * triggered. The event counters are not advanced until the end of the + * allocation / deallocation calls, so the lookahead can be useful if some + * preparation work for some event must be done early in the allocation / + * deallocation calls. + * + * Currently only the profiling sampling event needs the lookahead + * functionality, so we don't yet define general purpose lookahead functions. + * + * Defined here rather than prof.h because the inline body depends on tsd + * accessors that aren't visible until tsd inlines are loaded. + */ + +JEMALLOC_ALWAYS_INLINE bool +te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) { + if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { + return false; + } + /* The subtraction is intentionally susceptible to underflow. */ + uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize + - tsd_thread_allocated_last_event_get(tsd); + return accumbytes >= tsd_prof_sample_event_wait_get(tsd); +} + JEMALLOC_ALWAYS_INLINE void prof_active_assert(void) { cassert(config_prof); diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h deleted file mode 100644 index d3a13718..00000000 --- a/include/jemalloc/internal/prof_structs.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H -#define JEMALLOC_INTERNAL_PROF_STRUCTS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/rb.h" - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - void **vec; - unsigned *len; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* Profiling counters. */ - uint64_t curobjs; - uint64_t curobjs_shifted_unbiased; - uint64_t curbytes; - uint64_t curbytes_unbiased; - uint64_t accumobjs; - uint64_t accumobjs_shifted_unbiased; - uint64_t accumbytes; - uint64_t accumbytes_unbiased; -}; - -typedef enum { - prof_tctx_state_initializing, - prof_tctx_state_nominal, - prof_tctx_state_dumping, - prof_tctx_state_purgatory /* Dumper must finish destroying. */ -} prof_tctx_state_t; - -struct prof_tctx_s { - /* Thread data for thread that performed the allocation. */ - prof_tdata_t *tdata; - - /* - * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be - * defunct during teardown. - */ - uint64_t thr_uid; - uint64_t thr_discrim; - - /* - * Reference count of how many times this tctx object is referenced in - * recent allocation / deallocation records, protected by tdata->lock. - */ - uint64_t recent_count; - - /* Profiling counters, protected by tdata->lock. */ - prof_cnt_t cnts; - - /* Associated global context. */ - prof_gctx_t *gctx; - - /* - * UID that distinguishes multiple tctx's created by the same thread, - * but coexisting in gctx->tctxs. There are two ways that such - * coexistence can occur: - * - A dumper thread can cause a tctx to be retained in the purgatory - * state. - * - Although a single "producer" thread must create all tctx's which - * share the same thr_uid, multiple "consumers" can each concurrently - * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only - * gets called once each time cnts.cur{objs,bytes} drop to 0, but this - * threshold can be hit again before the first consumer finishes - * executing prof_tctx_destroy(). - */ - uint64_t tctx_uid; - - /* Linkage into gctx's tctxs. */ - rb_node(prof_tctx_t) tctx_link; - - /* - * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents - * sample vs destroy race. - */ - bool prepared; - - /* Current dump-related state, protected by gctx->lock. */ - prof_tctx_state_t state; - - /* - * Copy of cnts snapshotted during early dump phase, protected by - * dump_mtx. - */ - prof_cnt_t dump_cnts; -}; -typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; - -struct prof_info_s { - /* Time when the allocation was made. */ - nstime_t alloc_time; - /* Points to the prof_tctx_t corresponding to the allocation. */ - prof_tctx_t *alloc_tctx; - /* Allocation request size. */ - size_t alloc_size; -}; - -struct prof_gctx_s { - /* Protects nlimbo, cnt_summed, and tctxs. */ - malloc_mutex_t *lock; - - /* - * Number of threads that currently cause this gctx to be in a state of - * limbo due to one of: - * - Initializing this gctx. - * - Initializing per thread counters associated with this gctx. - * - Preparing to destroy this gctx. - * - Dumping a heap profile that includes this gctx. - * nlimbo must be 1 (single destroyer) in order to safely destroy the - * gctx. - */ - unsigned nlimbo; - - /* - * Tree of profile counters, one for each thread that has allocated in - * this context. - */ - prof_tctx_tree_t tctxs; - - /* Linkage for tree of contexts to be dumped. */ - rb_node(prof_gctx_t) dump_link; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Associated backtrace. */ - prof_bt_t bt; - - /* Backtrace vector, variable size, referred to by bt. */ - void *vec[1]; -}; -typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; - -struct prof_tdata_s { - malloc_mutex_t *lock; - - /* Monotonically increasing unique thread identifier. */ - uint64_t thr_uid; - - /* - * Monotonically increasing discriminator among tdata structures - * associated with the same thr_uid. - */ - uint64_t thr_discrim; - - rb_node(prof_tdata_t) tdata_link; - - /* - * Counter used to initialize prof_tctx_t's tctx_uid. No locking is - * necessary when incrementing this field, because only one thread ever - * does so. - */ - uint64_t tctx_uid_next; - - /* - * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks - * backtraces for which it has non-zero allocation/deallocation counters - * associated with thread-specific prof_tctx_t objects. Other threads - * may write to prof_tctx_t contents when freeing associated objects. - */ - ckh_t bt2tctx; - - /* Included in heap profile dumps if has content. */ - char thread_name[PROF_THREAD_NAME_MAX_LEN]; - - /* State used to avoid dumping while operating on prof internals. */ - bool enq; - bool enq_idump; - bool enq_gdump; - - /* - * Set to true during an early dump phase for tdata's which are - * currently being dumped. New threads' tdata's have this initialized - * to false so that they aren't accidentally included in later dump - * phases. - */ - bool dumping; - - /* - * True if profiling is active for this tdata's thread - * (thread.prof.active mallctl). - */ - bool active; - - bool attached; - bool expired; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void **vec; -}; -typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; - -struct prof_recent_s { - nstime_t alloc_time; - nstime_t dalloc_time; - - ql_elm(prof_recent_t) link; - size_t size; - size_t usize; - atomic_p_t alloc_edata; /* NULL means allocation has been freed. */ - prof_tctx_t *alloc_tctx; - prof_tctx_t *dalloc_tctx; -}; - -#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */ diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h deleted file mode 100644 index 7468885e..00000000 --- a/include/jemalloc/internal/prof_types.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H -#define JEMALLOC_INTERNAL_PROF_TYPES_H - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_tctx_s prof_tctx_t; -typedef struct prof_info_s prof_info_t; -typedef struct prof_gctx_s prof_gctx_t; -typedef struct prof_tdata_s prof_tdata_t; -typedef struct prof_recent_s prof_recent_t; - -/* Option defaults. */ -#ifdef JEMALLOC_PROF -# define PROF_PREFIX_DEFAULT "jeprof" -#else -# define PROF_PREFIX_DEFAULT "" -#endif -#define LG_PROF_SAMPLE_DEFAULT 19 -#define LG_PROF_INTERVAL_DEFAULT -1 - -/* - * Hard limit on stack backtrace depth. The version of prof_backtrace() that - * is based on __builtin_return_address() necessarily has a hard-coded number - * of backtrace frame handlers, and should be kept in sync with this setting. - */ -#ifdef JEMALLOC_PROF_GCC -# define PROF_BT_MAX_LIMIT 256 -#else -# define PROF_BT_MAX_LIMIT UINT_MAX -#endif -#define PROF_BT_MAX_DEFAULT 128 - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#ifndef JEMALLOC_PROF -/* Minimize memory bloat for non-prof builds. */ -# define PROF_DUMP_BUFSIZE 1 -#elif defined(JEMALLOC_DEBUG) -/* Use a small buffer size in debug build, mainly to facilitate testing. */ -# define PROF_DUMP_BUFSIZE 16 -#else -# define PROF_DUMP_BUFSIZE 65536 -#endif - -/* Size of size class related tables */ -#ifdef JEMALLOC_PROF -# define PROF_SC_NSIZES SC_NSIZES -#else -/* Minimize memory bloat for non-prof builds. */ -# define PROF_SC_NSIZES 1 -#endif - -/* Size of stack-allocated buffer used by prof_printf(). */ -#define PROF_PRINTF_BUFSIZE 128 - -/* - * Number of mutexes shared among all gctx's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NCTX_LOCKS 1024 - -/* - * Number of mutexes shared among all tdata's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NTDATA_LOCKS 256 - -/* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF -# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1) -#else -# define PROF_DUMP_FILENAME_LEN 1 -#endif - -/* Default number of recent allocations to record. */ -#define PROF_RECENT_ALLOC_MAX_DEFAULT 0 - -/* Thread name storage size limit. */ -#define PROF_THREAD_NAME_MAX_LEN 16 - -/* - * Minimum required alignment for sampled allocations. Over-aligning sampled - * allocations allows us to quickly identify them on the dalloc path without - * resorting to metadata lookup. - */ -#define PROF_SAMPLE_ALIGNMENT PAGE -#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK - -/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ -#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U)) - -#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h new file mode 100644 index 00000000..12a55f19 --- /dev/null +++ b/include/jemalloc/internal/tcache.h @@ -0,0 +1,198 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_H +#define JEMALLOC_INTERNAL_TCACHE_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/cache_bin.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/thread_event_registry.h" +#include "jemalloc/internal/ticker.h" + +/* Forward decl; only base_t * is used as a pointer arg below. */ +typedef struct base_s base_t; + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +typedef struct tcache_slow_s tcache_slow_t; +typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; + +/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */ +#define TCACHE_ZERO_INITIALIZER \ + { 0 } +#define TCACHE_SLOW_ZERO_INITIALIZER \ + { \ + { 0 } \ + } + +/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ +#define TCACHE_ENABLED_ZERO_INITIALIZER false + +/* Used for explicit tcache only. Means flushed but not destroyed. */ +/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ +#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1) + +#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD +#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT) +#define TCACHE_NBINS_MAX \ + (SC_NBINS \ + + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \ + + 1) +#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */ +#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */ +#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1) +#define TCACHE_GC_LARGE_NBINS_MAX 1 + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + +/* + * The tcache state is split into the slow and hot path data. Each has a + * pointer to the other, and the data always comes in pairs. The layout of each + * of them varies in practice; tcache_slow lives in the TSD for the automatic + * tcache, and as part of a dynamic allocation for manual allocations. Keeping + * a pointer to tcache_slow lets us treat these cases uniformly, rather than + * splitting up the tcache [de]allocation code into those paths called with the + * TSD tcache and those called with a manual tcache. + */ + +struct tcache_slow_s { + /* + * The descriptor lets the arena find our cache bins without seeing the + * tcache definition. This enables arenas to aggregate stats across + * tcaches without having a tcache dependency. + */ + cache_bin_array_descriptor_t cache_bin_array_descriptor; + + /* The arena this tcache is associated with. */ + arena_t *arena; + /* The number of bins activated in the tcache. */ + unsigned tcache_nbins; + /* Last time GC has been performed. */ + nstime_t last_gc_time; + /* Next bin to GC. */ + szind_t next_gc_bin; + szind_t next_gc_bin_small; + szind_t next_gc_bin_large; + /* For small bins, help determine how many items to fill at a time. */ + cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS]; + /* For small bins, whether has been refilled since last GC. */ + bool bin_refilled[SC_NBINS]; + /* + * For small bins, the number of items we can pretend to flush before + * actually flushing. + */ + uint8_t bin_flush_delay_items[SC_NBINS]; + /* + * The start of the allocation containing the dynamic allocation for + * either the cache bins alone, or the cache bin memory as well as this + * tcache_slow_t and its associated tcache_t. + */ + void *dyn_alloc; + + /* The associated bins. */ + tcache_t *tcache; +}; + +struct tcache_s { + tcache_slow_t *tcache_slow; + cache_bin_t bins[TCACHE_NBINS_MAX]; +}; + +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + +extern bool opt_tcache; +extern size_t opt_tcache_max; +extern ssize_t opt_lg_tcache_nslots_mul; +extern unsigned opt_tcache_nslots_small_min; +extern unsigned opt_tcache_nslots_small_max; +extern unsigned opt_tcache_nslots_large; +extern ssize_t opt_lg_tcache_shift; +extern size_t opt_tcache_gc_incr_bytes; +extern size_t opt_tcache_gc_delay_bytes; +extern unsigned opt_lg_tcache_flush_small_div; +extern unsigned opt_lg_tcache_flush_large_div; + +/* + * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more + * large-object bins. This is only used during threads initialization and + * changing it will not reflect on initialized threads as expected. Thus, + * it should not be changed on the fly. To change the number of tcache bins + * in use, refer to tcache_nbins of each tcache. + */ +extern unsigned global_do_not_change_tcache_nbins; + +/* + * Maximum cached size class. Same as above, this is only used during threads + * initialization and should not be changed. To change the maximum cached size + * class, refer to tcache_max of each tcache. + */ +extern size_t global_do_not_change_tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; + +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, bool *tcache_success); + +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, unsigned rem); +void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, bool is_small); +bool tcache_bin_info_default_init( + const char *bin_settings_segment_cur, size_t len_left); +bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len); +bool tcache_bin_ncached_max_read( + tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max); +void tcache_arena_reassociate( + tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); +tcache_t *tcache_create_explicit(tsd_t *tsd); +bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max); +void tcache_cleanup(tsd_t *tsd); +bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(tsdn_t *tsdn, base_t *base); +void tcache_arena_associate( + tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); +cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor( + tsdn_t *tsdn, arena_t *arena); +void tcache_prefork(tsdn_t *tsdn); +void tcache_postfork_parent(tsdn_t *tsdn); +void tcache_postfork_child(tsdn_t *tsdn); +void tcache_flush(tsd_t *tsd); +bool tsd_tcache_enabled_data_init(tsd_t *tsd); +void tcache_enabled_set(tsd_t *tsd, bool enabled); + +extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size, + size_t alignment); + +void tcache_assert_initialized(tcache_t *tcache); + +extern te_base_cb_t tcache_gc_te_handler; + +#endif /* JEMALLOC_INTERNAL_TCACHE_H */ diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h deleted file mode 100644 index 4dc0bae9..00000000 --- a/include/jemalloc/internal/tcache_externs.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H -#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/cache_bin.h" -#include "jemalloc/internal/sz.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/thread_event_registry.h" - -extern bool opt_tcache; -extern size_t opt_tcache_max; -extern ssize_t opt_lg_tcache_nslots_mul; -extern unsigned opt_tcache_nslots_small_min; -extern unsigned opt_tcache_nslots_small_max; -extern unsigned opt_tcache_nslots_large; -extern ssize_t opt_lg_tcache_shift; -extern size_t opt_tcache_gc_incr_bytes; -extern size_t opt_tcache_gc_delay_bytes; -extern unsigned opt_lg_tcache_flush_small_div; -extern unsigned opt_lg_tcache_flush_large_div; - -/* - * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more - * large-object bins. This is only used during threads initialization and - * changing it will not reflect on initialized threads as expected. Thus, - * it should not be changed on the fly. To change the number of tcache bins - * in use, refer to tcache_nbins of each tcache. - */ -extern unsigned global_do_not_change_tcache_nbins; - -/* - * Maximum cached size class. Same as above, this is only used during threads - * initialization and should not be changed. To change the maximum cached size - * class, refer to tcache_max of each tcache. - */ -extern size_t global_do_not_change_tcache_maxclass; - -/* - * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and - * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are - * completely disjoint from this data structure. tcaches starts off as a sparse - * array, so it has no physical memory footprint until individual pages are - * touched. This allows the entire array to be allocated the first time an - * explicit tcache is created without a disproportionate impact on memory usage. - */ -extern tcaches_t *tcaches; - -size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); -void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, bool *tcache_success); - -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, unsigned rem); -void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, bool is_small); -bool tcache_bin_info_default_init( - const char *bin_settings_segment_cur, size_t len_left); -bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len); -bool tcache_bin_ncached_max_read( - tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max); -void tcache_arena_reassociate( - tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); -tcache_t *tcache_create_explicit(tsd_t *tsd); -bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max); -void tcache_cleanup(tsd_t *tsd); -bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); -void tcaches_flush(tsd_t *tsd, unsigned ind); -void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(tsdn_t *tsdn, base_t *base); -void tcache_arena_associate( - tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); -cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor( - tsdn_t *tsdn, arena_t *arena); -void tcache_prefork(tsdn_t *tsdn); -void tcache_postfork_parent(tsdn_t *tsdn); -void tcache_postfork_child(tsdn_t *tsdn); -void tcache_flush(tsd_t *tsd); -bool tsd_tcache_enabled_data_init(tsd_t *tsd); -void tcache_enabled_set(tsd_t *tsd, bool enabled); - -extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size, - size_t alignment); - -void tcache_assert_initialized(tcache_t *tcache); - -extern te_base_cb_t tcache_gc_te_handler; - -#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */ diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 181db1b3..8ce0fb01 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -10,7 +10,7 @@ #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/util.h" static inline bool diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h deleted file mode 100644 index 710286c9..00000000 --- a/include/jemalloc/internal/tcache_structs.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H -#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/cache_bin.h" -#include "jemalloc/internal/ql.h" -#include "jemalloc/internal/sc.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/ticker.h" - -/* - * The tcache state is split into the slow and hot path data. Each has a - * pointer to the other, and the data always comes in pairs. The layout of each - * of them varies in practice; tcache_slow lives in the TSD for the automatic - * tcache, and as part of a dynamic allocation for manual allocations. Keeping - * a pointer to tcache_slow lets us treat these cases uniformly, rather than - * splitting up the tcache [de]allocation code into those paths called with the - * TSD tcache and those called with a manual tcache. - */ - -struct tcache_slow_s { - /* - * The descriptor lets the arena find our cache bins without seeing the - * tcache definition. This enables arenas to aggregate stats across - * tcaches without having a tcache dependency. - */ - cache_bin_array_descriptor_t cache_bin_array_descriptor; - - /* The arena this tcache is associated with. */ - arena_t *arena; - /* The number of bins activated in the tcache. */ - unsigned tcache_nbins; - /* Last time GC has been performed. */ - nstime_t last_gc_time; - /* Next bin to GC. */ - szind_t next_gc_bin; - szind_t next_gc_bin_small; - szind_t next_gc_bin_large; - /* For small bins, help determine how many items to fill at a time. */ - cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS]; - /* For small bins, whether has been refilled since last GC. */ - bool bin_refilled[SC_NBINS]; - /* - * For small bins, the number of items we can pretend to flush before - * actually flushing. - */ - uint8_t bin_flush_delay_items[SC_NBINS]; - /* - * The start of the allocation containing the dynamic allocation for - * either the cache bins alone, or the cache bin memory as well as this - * tcache_slow_t and its associated tcache_t. - */ - void *dyn_alloc; - - /* The associated bins. */ - tcache_t *tcache; -}; - -struct tcache_s { - tcache_slow_t *tcache_slow; - cache_bin_t bins[TCACHE_NBINS_MAX]; -}; - -/* Linkage for list of available (previously used) explicit tcache IDs. */ -struct tcaches_s { - union { - tcache_t *tcache; - tcaches_t *next; - }; -}; - -#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */ diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h deleted file mode 100644 index 27d80d3c..00000000 --- a/include/jemalloc/internal/tcache_types.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H -#define JEMALLOC_INTERNAL_TCACHE_TYPES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/sc.h" - -typedef struct tcache_slow_s tcache_slow_t; -typedef struct tcache_s tcache_t; -typedef struct tcaches_s tcaches_t; - -/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */ -#define TCACHE_ZERO_INITIALIZER \ - { 0 } -#define TCACHE_SLOW_ZERO_INITIALIZER \ - { \ - { 0 } \ - } - -/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ -#define TCACHE_ENABLED_ZERO_INITIALIZER false - -/* Used for explicit tcache only. Means flushed but not destroyed. */ -/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ -#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1) - -#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD -#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT) -#define TCACHE_NBINS_MAX \ - (SC_NBINS \ - + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \ - + 1) -#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */ -#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */ -#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1) -#define TCACHE_GC_LARGE_NBINS_MAX 1 - -#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */ diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h index a7b6fa5e..09590eaf 100644 --- a/include/jemalloc/internal/tsd_internals.h +++ b/include/jemalloc/internal/tsd_internals.h @@ -9,11 +9,12 @@ #include "jemalloc/internal/tsd_binshards.h" #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/peak.h" -#include "jemalloc/internal/prof_types.h" #include "jemalloc/internal/rtree_tsd.h" -#include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/thread_event_registry.h" + +/* Forward decl; tsd_internals.h only uses prof_tdata_t as a pointer type. */ +typedef struct prof_tdata_s prof_tdata_t; #include "jemalloc/internal/tsd_types.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/witness.h" diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c index b8307df0..1db8fa72 100644 --- a/src/thread_event_registry.c +++ b/src/thread_event_registry.c @@ -3,9 +3,9 @@ #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/peak_event.h" -#include "jemalloc/internal/prof_externs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/stats.h" static malloc_mutex_t uevents_mu; From a54018337c9b4bf1add342b0322cabac9b9717ab Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 18:30:31 -0400 Subject: [PATCH 07/19] Consolidate arena_* header split into arena.h arena_types.h + arena_structs.h + arena_externs.h merged into arena.h, keeping the three logical sections (TYPES / STRUCTS / EXTERNS) with explicit dividers. arena_inlines_a.h and arena_inlines_b.h stay separate; arena_inlines_b.h now carries a comment explaining why merging the two would reintroduce a real #include cycle through tcache_inlines.h -> arena_choose (the asymmetric cycle-breaker). Two ordering gotchas this consolidation surfaced: 1. tsd_internals.h is included from tsd.h via tsd_generic.h, sometimes long before arena.h is loaded (e.g. ckh.c includes ckh.h -> tsd.h before jemalloc_internal_includes.h). TSD_INITIALIZER's expansion in tsd_generic.h's function bodies references ARENA_DECAY_NTICKS_PER_UPDATE, so it must already be defined. Factor the constant into a new minimal header, arena_decay_constants.h, that pulls nothing but jemalloc_preamble.h, and include it from both arena.h and tsd_internals.h. arena_t is still added as a forward decl in tsd_internals.h -- including arena.h there would trigger arena_stats.h -> mutex.h -> tsd.h -> re-entry into this very file. 2. extent_dss.h previously included arena_types.h for the arena_t pointer type, but arena.h now includes extent_dss.h (it was a STRUCTS-section dep). Forward-decl arena_t in extent_dss.h to break that cycle. Additional forward decls in tcache.h and large.h (arena_t *). These were previously satisfied by the master include order loading arena_types.h before everything else; with arena.h now in the EXTERNS section, large.h and tcache.h are parsed earlier than arena.h, so they need to declare arena_t themselves. jemalloc_internal_externs.h's #include of arena_types.h was vestigial -- the file uses no arena symbols. Dropped. --- .../internal/{arena_externs.h => arena.h} | 179 +++++++++++++++++- .../jemalloc/internal/arena_decay_constants.h | 13 ++ include/jemalloc/internal/arena_inlines_a.h | 2 +- include/jemalloc/internal/arena_inlines_b.h | 22 ++- include/jemalloc/internal/arena_structs.h | 114 ----------- include/jemalloc/internal/arena_types.h | 60 ------ include/jemalloc/internal/arenas_management.h | 2 +- include/jemalloc/internal/extent_dss.h | 4 +- .../internal/jemalloc_internal_externs.h | 1 - .../internal/jemalloc_internal_includes.h | 13 +- .../internal/jemalloc_internal_inlines_a.h | 3 +- .../internal/jemalloc_internal_inlines_c.h | 2 +- include/jemalloc/internal/large.h | 3 +- .../internal/malloc_dispatch_inlines.h | 2 +- include/jemalloc/internal/tcache.h | 5 +- include/jemalloc/internal/tcache_inlines.h | 2 +- include/jemalloc/internal/tsd_internals.h | 11 +- test/integration/extent.c | 2 +- test/unit/san_bump.c | 2 +- test/unit/stats.c | 2 +- 20 files changed, 235 insertions(+), 209 deletions(-) rename include/jemalloc/internal/{arena_externs.h => arena.h} (52%) create mode 100644 include/jemalloc/internal/arena_decay_constants.h delete mode 100644 include/jemalloc/internal/arena_structs.h delete mode 100644 include/jemalloc/internal/arena_types.h diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena.h similarity index 52% rename from include/jemalloc/internal/arena_externs.h rename to include/jemalloc/internal/arena.h index 694c5d81..a9f4cc02 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena.h @@ -1,12 +1,183 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H -#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H +#ifndef JEMALLOC_INTERNAL_ARENA_H +#define JEMALLOC_INTERNAL_ARENA_H #include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/arena_decay_constants.h" +#include "jemalloc/internal/sc.h" + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +/* Default decay times in milliseconds. */ +#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000) +#define MUZZY_DECAY_MS_DEFAULT (0) +/* Maximum length of the arena name. */ +#define ARENA_NAME_LEN 32 + +typedef struct arena_s arena_t; + +typedef enum { + percpu_arena_mode_names_base = 0, /* Used for options processing. */ + + /* + * *_uninit are used only during bootstrapping, and must correspond + * to initialized variant plus percpu_arena_mode_enabled_base. + */ + percpu_arena_uninit = 0, + per_phycpu_arena_uninit = 1, + + /* All non-disabled modes must come after percpu_arena_disabled. */ + percpu_arena_disabled = 2, + + percpu_arena_mode_names_limit = 3, /* Used for options processing. */ + percpu_arena_mode_enabled_base = 3, + + percpu_arena = 3, + per_phycpu_arena = 4 /* Hyper threads share arena. */ +} percpu_arena_mode_t; + +#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) +#define PERCPU_ARENA_DEFAULT percpu_arena_disabled + +/* + * When allocation_size >= oversize_threshold, use the dedicated huge arena + * (unless have explicitly spicified arena index). 0 disables the feature. + */ +#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20) + +struct arena_config_s { + /* extent hooks to be used for the arena */ + extent_hooks_t *extent_hooks; + + /* + * Use extent hooks for metadata (base) allocations when true. + */ + bool metadata_use_hooks; +}; + +typedef struct arena_config_s arena_config_t; + +extern const arena_config_t arena_config_default; + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + #include "jemalloc/internal/arena_stats.h" +#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/bin.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/counter.h" +#include "jemalloc/internal/ecache.h" +#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/pa.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/ticker.h" + +struct arena_s { + /* + * Number of threads currently assigned to this arena. Each thread has + * two distinct assignments, one for application-serving allocation, and + * the other for internal metadata allocation. Internal metadata must + * not be allocated from arenas explicitly created via the arenas.create + * mallctl, because the arena..reset mallctl indiscriminately + * discards all allocations for the affected arena. + * + * 0: Application allocation. + * 1: Internal metadata allocation. + * + * Synchronization: atomic. + */ + atomic_u_t nthreads[2]; + + /* Next bin shard for binding new threads. Synchronization: atomic. */ + atomic_u_t binshard_next; + + /* + * When percpu_arena is enabled, to amortize the cost of reading / + * updating the current CPU id, track the most recent thread accessing + * this arena, and only read CPU if there is a mismatch. + */ + tsdn_t *last_thd; + + /* Synchronization: internal. */ + arena_stats_t stats; + + /* + * List of cache_bin_array_descriptors for extant threads associated + * with this arena. Stats from these are merged incrementally, and at + * exit if opt_stats_print is enabled. + * + * Synchronization: cache_bin_array_descriptor_ql_mtx. + */ + ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; + malloc_mutex_t cache_bin_array_descriptor_ql_mtx; + + /* + * Represents a dss_prec_t, but atomically. + * + * Synchronization: atomic. + */ + atomic_u_t dss_prec; + + /* + * Extant large allocations. + * + * Synchronization: large_mtx. + */ + edata_list_active_t large; + /* Synchronizes all large allocation/update/deallocation. */ + malloc_mutex_t large_mtx; + + /* The page-level allocator shard this arena uses. */ + pa_shard_t pa_shard; + + /* + * A cached copy of base->ind. This can get accessed on hot paths; + * looking it up in base requires an extra pointer hop / cache miss. + */ + unsigned ind; + + /* + * Base allocator, from which arena metadata are allocated. + * + * Synchronization: internal. + */ + base_t *base; + /* Used to determine uptime. Read-only after initialization. */ + nstime_t create_time; + + /* The name of the arena. */ + char name[ARENA_NAME_LEN]; + + /* + * The arena is allocated alongside its bins; really this is a + * dynamically sized array determined by the binshard settings. + * Enforcing cacheline-alignment to minimize the number of cachelines + * touched on the hot paths. + */ + JEMALLOC_WARN_ON_USAGE( + "Do not use this field directly. " + "Use `arena_get_bin` instead.") + JEMALLOC_ALIGNED(CACHELINE) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + bin_t all_bins[]; +#else + bin_t all_bins[0]; +#endif +}; + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + #include "jemalloc/internal/div.h" #include "jemalloc/internal/emap.h" -#include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/pages.h" #include "jemalloc/internal/stats.h" @@ -123,4 +294,4 @@ void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); void arena_postfork_child(tsdn_t *tsdn, arena_t *arena, cache_bin_array_descriptor_t *surviving_desc); -#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */ +#endif /* JEMALLOC_INTERNAL_ARENA_H */ diff --git a/include/jemalloc/internal/arena_decay_constants.h b/include/jemalloc/internal/arena_decay_constants.h new file mode 100644 index 00000000..e98b9624 --- /dev/null +++ b/include/jemalloc/internal/arena_decay_constants.h @@ -0,0 +1,13 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_DECAY_CONSTANTS_H +#define JEMALLOC_INTERNAL_ARENA_DECAY_CONSTANTS_H + +/* + * Minimal header so both arena.h and tsd_internals.h can share decay-related + * constants without dragging the full arena types into the tsd parse chain + * (which is loaded long before arena.h via ckh.h -> tsd.h). + */ + +/* Number of event ticks between time checks. */ +#define ARENA_DECAY_NTICKS_PER_UPDATE 1000 + +#endif /* JEMALLOC_INTERNAL_ARENA_DECAY_CONSTANTS_H */ diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h index a899928c..d8ecd496 100644 --- a/include/jemalloc/internal/arena_inlines_a.h +++ b/include/jemalloc/internal/arena_inlines_a.h @@ -2,7 +2,7 @@ #define JEMALLOC_INTERNAL_ARENA_INLINES_A_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_structs.h" +#include "jemalloc/internal/arena.h" static inline unsigned arena_ind_get(const arena_t *arena) { diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index f790834c..5cfe7d25 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -1,9 +1,27 @@ #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H +/* + * This split (arena_inlines_a.h + arena_inlines_b.h) is load-bearing, not + * stylistic. arena_inlines_a.h holds the cheap field accessors that only + * depend on arena.h fields. This file holds the larger inlines that depend + * on arena_choose(), prof, large, and friends. + * + * Merging the two would create a real #include cycle through arena_choose(): + * jemalloc_internal_inlines_b.h defines arena_choose() and pulls in + * arena_inlines_a.h at the top for the cheap accessors. arena_choose() is + * called from arena_choose_maybe_huge() in this file. If that #include + * resolved to a merged "arena_inlines.h", arena_choose_maybe_huge() would + * be parsed before arena_choose() exists, and we would get an implicit + * declaration error -- arena_inlines.h cannot pull in + * jemalloc_internal_inlines_b.h to fix it (that file is mid-parse and its + * include guard is already set). + * + * Keep this file separate from arena_inlines_a.h. + */ + #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" -#include "jemalloc/internal/arena_structs.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/div.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/jemalloc_internal_inlines_b.h" diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h deleted file mode 100644 index ccab0a17..00000000 --- a/include/jemalloc/internal/arena_structs.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_H -#define JEMALLOC_INTERNAL_ARENA_STRUCTS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_stats.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/bin.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/counter.h" -#include "jemalloc/internal/ecache.h" -#include "jemalloc/internal/edata_cache.h" -#include "jemalloc/internal/extent_dss.h" -#include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/pa.h" -#include "jemalloc/internal/ql.h" -#include "jemalloc/internal/sc.h" -#include "jemalloc/internal/ticker.h" - -struct arena_s { - /* - * Number of threads currently assigned to this arena. Each thread has - * two distinct assignments, one for application-serving allocation, and - * the other for internal metadata allocation. Internal metadata must - * not be allocated from arenas explicitly created via the arenas.create - * mallctl, because the arena..reset mallctl indiscriminately - * discards all allocations for the affected arena. - * - * 0: Application allocation. - * 1: Internal metadata allocation. - * - * Synchronization: atomic. - */ - atomic_u_t nthreads[2]; - - /* Next bin shard for binding new threads. Synchronization: atomic. */ - atomic_u_t binshard_next; - - /* - * When percpu_arena is enabled, to amortize the cost of reading / - * updating the current CPU id, track the most recent thread accessing - * this arena, and only read CPU if there is a mismatch. - */ - tsdn_t *last_thd; - - /* Synchronization: internal. */ - arena_stats_t stats; - - /* - * List of cache_bin_array_descriptors for extant threads associated - * with this arena. Stats from these are merged incrementally, and at - * exit if opt_stats_print is enabled. - * - * Synchronization: cache_bin_array_descriptor_ql_mtx. - */ - ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; - malloc_mutex_t cache_bin_array_descriptor_ql_mtx; - - /* - * Represents a dss_prec_t, but atomically. - * - * Synchronization: atomic. - */ - atomic_u_t dss_prec; - - /* - * Extant large allocations. - * - * Synchronization: large_mtx. - */ - edata_list_active_t large; - /* Synchronizes all large allocation/update/deallocation. */ - malloc_mutex_t large_mtx; - - /* The page-level allocator shard this arena uses. */ - pa_shard_t pa_shard; - - /* - * A cached copy of base->ind. This can get accessed on hot paths; - * looking it up in base requires an extra pointer hop / cache miss. - */ - unsigned ind; - - /* - * Base allocator, from which arena metadata are allocated. - * - * Synchronization: internal. - */ - base_t *base; - /* Used to determine uptime. Read-only after initialization. */ - nstime_t create_time; - - /* The name of the arena. */ - char name[ARENA_NAME_LEN]; - - /* - * The arena is allocated alongside its bins; really this is a - * dynamically sized array determined by the binshard settings. - * Enforcing cacheline-alignment to minimize the number of cachelines - * touched on the hot paths. - */ - JEMALLOC_WARN_ON_USAGE( - "Do not use this field directly. " - "Use `arena_get_bin` instead.") - JEMALLOC_ALIGNED(CACHELINE) -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - bin_t all_bins[]; -#else - bin_t all_bins[0]; -#endif -}; - -#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */ diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h deleted file mode 100644 index c586164f..00000000 --- a/include/jemalloc/internal/arena_types.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H -#define JEMALLOC_INTERNAL_ARENA_TYPES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/sc.h" - -/* Default decay times in milliseconds. */ -#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000) -#define MUZZY_DECAY_MS_DEFAULT (0) -/* Number of event ticks between time checks. */ -#define ARENA_DECAY_NTICKS_PER_UPDATE 1000 -/* Maximum length of the arena name. */ -#define ARENA_NAME_LEN 32 - -typedef struct arena_s arena_t; - -typedef enum { - percpu_arena_mode_names_base = 0, /* Used for options processing. */ - - /* - * *_uninit are used only during bootstrapping, and must correspond - * to initialized variant plus percpu_arena_mode_enabled_base. - */ - percpu_arena_uninit = 0, - per_phycpu_arena_uninit = 1, - - /* All non-disabled modes must come after percpu_arena_disabled. */ - percpu_arena_disabled = 2, - - percpu_arena_mode_names_limit = 3, /* Used for options processing. */ - percpu_arena_mode_enabled_base = 3, - - percpu_arena = 3, - per_phycpu_arena = 4 /* Hyper threads share arena. */ -} percpu_arena_mode_t; - -#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) -#define PERCPU_ARENA_DEFAULT percpu_arena_disabled - -/* - * When allocation_size >= oversize_threshold, use the dedicated huge arena - * (unless have explicitly spicified arena index). 0 disables the feature. - */ -#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20) - -struct arena_config_s { - /* extent hooks to be used for the arena */ - extent_hooks_t *extent_hooks; - - /* - * Use extent hooks for metadata (base) allocations when true. - */ - bool metadata_use_hooks; -}; - -typedef struct arena_config_s arena_config_t; - -extern const arena_config_t arena_config_default; - -#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */ diff --git a/include/jemalloc/internal/arenas_management.h b/include/jemalloc/internal/arenas_management.h index 58d944d6..d990a278 100644 --- a/include/jemalloc/internal/arenas_management.h +++ b/include/jemalloc/internal/arenas_management.h @@ -1,7 +1,7 @@ #ifndef JEMALLOC_INTERNAL_ARENAS_MANAGEMENT_H #define JEMALLOC_INTERNAL_ARENAS_MANAGEMENT_H -#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/tsd_types.h" diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h index c84f1799..e9aa247a 100644 --- a/include/jemalloc/internal/extent_dss.h +++ b/include/jemalloc/internal/extent_dss.h @@ -2,9 +2,11 @@ #define JEMALLOC_INTERNAL_EXTENT_DSS_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_types.h" #include "jemalloc/internal/tsd_types.h" +/* Forward decl; arena.h includes us, so we can't include arena.h back. */ +typedef struct arena_s arena_t; + typedef enum { dss_prec_disabled = 0, dss_prec_primary = 1, diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index b5b12e91..b0876603 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -1,7 +1,6 @@ #ifndef JEMALLOC_INTERNAL_EXTERNS_H #define JEMALLOC_INTERNAL_EXTERNS_H -#include "jemalloc/internal/arena_types.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/fxp.h" #include "jemalloc/internal/hpa_opts.h" diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h index 87ef4c82..299695f1 100644 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -36,24 +36,13 @@ * global jemalloc definitions, however. */ -/******************************************************************************/ -/* TYPES */ -/******************************************************************************/ - -#include "jemalloc/internal/arena_types.h" - -/******************************************************************************/ -/* STRUCTS */ -/******************************************************************************/ - -#include "jemalloc/internal/arena_structs.h" +#include "jemalloc/internal/arena.h" /******************************************************************************/ /* EXTERNS */ /******************************************************************************/ #include "jemalloc/internal/jemalloc_internal_externs.h" -#include "jemalloc/internal/arena_externs.h" #include "jemalloc/internal/large.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/malloc_dispatch_externs.h" diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h index 01771d7a..9912eff4 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -2,8 +2,7 @@ #define JEMALLOC_INTERNAL_INLINES_A_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" -#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/bit_util.h" diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 323f32d5..389cf6cf 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -2,7 +2,7 @@ #define JEMALLOC_INTERNAL_INLINES_C_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/jemalloc_init.h" diff --git a/include/jemalloc/internal/large.h b/include/jemalloc/internal/large.h index 8e7bdae0..bf63bc3d 100644 --- a/include/jemalloc/internal/large.h +++ b/include/jemalloc/internal/large.h @@ -4,7 +4,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/edata.h" -/* Forward decl; only prof_info_t * is used as a pointer arg below. */ +/* Forward decls; only used as pointer types below. */ +typedef struct arena_s arena_t; typedef struct prof_info_s prof_info_t; void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); diff --git a/include/jemalloc/internal/malloc_dispatch_inlines.h b/include/jemalloc/internal/malloc_dispatch_inlines.h index 20a6c5ee..79405120 100644 --- a/include/jemalloc/internal/malloc_dispatch_inlines.h +++ b/include/jemalloc/internal/malloc_dispatch_inlines.h @@ -2,7 +2,7 @@ #define JEMALLOC_INTERNAL_MALLOC_DISPATCH_INLINES_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/bin.h" #include "jemalloc/internal/div.h" diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 12a55f19..e9ea5ac1 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -9,8 +9,9 @@ #include "jemalloc/internal/thread_event_registry.h" #include "jemalloc/internal/ticker.h" -/* Forward decl; only base_t * is used as a pointer arg below. */ -typedef struct base_s base_t; +/* Forward decls; only used as pointer types below. */ +typedef struct arena_s arena_t; +typedef struct base_s base_t; /******************************************************************************/ /* TYPES */ diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 8ce0fb01..2b767391 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -2,7 +2,7 @@ #define JEMALLOC_INTERNAL_TCACHE_INLINES_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/bin.h" #include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_types.h" diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h index 09590eaf..b8970ed0 100644 --- a/include/jemalloc/internal/tsd_internals.h +++ b/include/jemalloc/internal/tsd_internals.h @@ -4,7 +4,7 @@ #define JEMALLOC_INTERNAL_TSD_INTERNALS_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/arena_decay_constants.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/tsd_binshards.h" #include "jemalloc/internal/jemalloc_internal_externs.h" @@ -13,8 +13,15 @@ #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/thread_event_registry.h" -/* Forward decl; tsd_internals.h only uses prof_tdata_t as a pointer type. */ +/* + * Forward decls. tsd_internals.h cannot include arena.h / prof.h directly: + * those headers' STRUCTS-section includes trigger mutex.h -> tsd.h -> + * tsd_generic.h, which would re-enter this file before its body finishes. + * Each consumer here only uses these as pointer types. + */ +typedef struct arena_s arena_t; typedef struct prof_tdata_s prof_tdata_t; + #include "jemalloc/internal/tsd_types.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/witness.h" diff --git a/test/integration/extent.c b/test/integration/extent.c index c15bf761..36091ac6 100644 --- a/test/integration/extent.c +++ b/test/integration/extent.c @@ -2,7 +2,7 @@ #include "test/extent_hooks.h" -#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/arena.h" static void test_extent_body(unsigned arena_ind) { diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c index 54d8583d..423f2be3 100644 --- a/test/unit/san_bump.c +++ b/test/unit/san_bump.c @@ -1,7 +1,7 @@ #include "test/jemalloc_test.h" #include "test/arena_util.h" -#include "jemalloc/internal/arena_structs.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/san_bump.h" static extent_hooks_t *san_bump_default_hooks; diff --git a/test/unit/stats.c b/test/unit/stats.c index d2719db2..ee6cc9b2 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -1,6 +1,6 @@ #include "test/jemalloc_test.h" -#include "jemalloc/internal/arena_structs.h" +#include "jemalloc/internal/arena.h" #define STRINGIFY_HELPER(x) #x #define STRINGIFY(x) STRINGIFY_HELPER(x) From 22c3dc450c85556c65c2a0c7b39b051215e29f95 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 18:44:56 -0400 Subject: [PATCH 08/19] Drop umbrella include from 5 small .c files (batch 1) Replaces #include "jemalloc_internal_includes.h" with explicit per-symbol includes in five small TUs: src/edata.c -> edata.h src/exp_grow.c -> exp_grow.h src/ticker.c -> ticker.h src/bin_info.c -> assert.h, bin_info.h src/counter.c -> counter.h, witness.h One latent hermeticity bug surfaced: sz.h's sz_large_size_classes_disabled() inline references opt_disable_large_size_classes (declared in jemalloc_internal_externs.h) but sz.h didn't include that header. Worked under the umbrella but breaks once consumers stop including everything. Added the include to sz.h so it stands on its own. Step 6 (Option B) of the cyclical-dep cleanup, batch 1 of N. --- include/jemalloc/internal/sz.h | 1 + src/bin_info.c | 2 +- src/counter.c | 2 +- src/edata.c | 3 ++- src/exp_grow.c | 3 ++- src/ticker.c | 3 ++- 6 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h index 3ee8a6b3..0e7b029e 100644 --- a/include/jemalloc/internal/sz.h +++ b/include/jemalloc/internal/sz.h @@ -3,6 +3,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/pages.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/util.h" diff --git a/src/bin_info.c b/src/bin_info.c index e10042fd..0b8e551a 100644 --- a/src/bin_info.c +++ b/src/bin_info.c @@ -1,6 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/assert.h" #include "jemalloc/internal/bin_info.h" bin_info_t bin_infos[SC_NBINS]; diff --git a/src/counter.c b/src/counter.c index 8257a062..243c41ba 100644 --- a/src/counter.c +++ b/src/counter.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/counter.h" +#include "jemalloc/internal/witness.h" bool counter_accum_init(counter_accum_t *counter, uint64_t interval) { diff --git a/src/edata.c b/src/edata.c index d71d1679..575e4c86 100644 --- a/src/edata.c +++ b/src/edata.c @@ -1,5 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/edata.h" ph_gen(, edata_avail, edata_t, avail_link, edata_esnead_comp) ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp) diff --git a/src/exp_grow.c b/src/exp_grow.c index 955823a1..17699561 100644 --- a/src/exp_grow.c +++ b/src/exp_grow.c @@ -1,5 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/exp_grow.h" void exp_grow_init(exp_grow_t *exp_grow) { diff --git a/src/ticker.c b/src/ticker.c index 1fd6ac96..b3cac9d1 100644 --- a/src/ticker.c +++ b/src/ticker.c @@ -1,5 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/ticker.h" /* * To avoid using floating point math down core paths (still necessary because From 89440d0fd3cbe9a8d4a943d5373ecf022e1487f3 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 18:50:57 -0400 Subject: [PATCH 09/19] Drop umbrella include from 9 medium .c files (batch 2) Converted: hpa_utils, ecache, extent_mmap, util, safety_check, prof_stats, peak_event, inspect, log. (src/div.c was already minimal; skipped.) One latent hermeticity bug surfaced: peak_event.h declared `extern te_base_cb_t peak_te_handler;` but didn't include thread_event_registry.h where te_base_cb_t is typedef'd. Added the include to peak_event.h. peak_event.c also needs thread_event.h directly for TE_MIN_START_WAIT. Step 6 (Option B) of the cyclical-dep cleanup, batch 2 of N. --- include/jemalloc/internal/peak_event.h | 1 + src/ecache.c | 6 ++++-- src/extent_mmap.c | 2 +- src/hpa_utils.c | 2 +- src/inspect.c | 7 ++++++- src/log.c | 1 - src/peak_event.c | 6 +++--- src/prof_stats.c | 4 +++- src/safety_check.c | 4 +++- src/util.c | 2 +- 10 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h index 0d1f1627..67e7a71b 100644 --- a/include/jemalloc/internal/peak_event.h +++ b/include/jemalloc/internal/peak_event.h @@ -2,6 +2,7 @@ #define JEMALLOC_INTERNAL_PEAK_EVENT_H #include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/thread_event_registry.h" #include "jemalloc/internal/tsd_types.h" /* diff --git a/src/ecache.c b/src/ecache.c index 20fcee9e..e6620a8a 100644 --- a/src/ecache.c +++ b/src/ecache.c @@ -1,7 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/san.h" +#include "jemalloc/internal/ecache.h" +#include "jemalloc/internal/eset.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/witness.h" bool ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind, diff --git a/src/extent_mmap.c b/src/extent_mmap.c index d39bddc6..10574618 100644 --- a/src/extent_mmap.c +++ b/src/extent_mmap.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/pages.h" /******************************************************************************/ /* Data. */ diff --git a/src/hpa_utils.c b/src/hpa_utils.c index 59bb0d1f..02817d83 100644 --- a/src/hpa_utils.c +++ b/src/hpa_utils.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/hpa_utils.h" +#include "jemalloc/internal/hpdata.h" void hpa_purge_batch(hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz) { diff --git a/src/inspect.c b/src/inspect.c index 1c0de129..b2a961b2 100644 --- a/src/inspect.c +++ b/src/inspect.c @@ -1,5 +1,10 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/bin_info.h" +#include "jemalloc/internal/edata.h" +#include "jemalloc/internal/emap.h" #include "jemalloc/internal/inspect.h" void diff --git a/src/log.c b/src/log.c index 9b1c6261..63054d83 100644 --- a/src/log.c +++ b/src/log.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/log.h" diff --git a/src/peak_event.c b/src/peak_event.c index 39f90b70..f42cfc47 100644 --- a/src/peak_event.c +++ b/src/peak_event.c @@ -1,10 +1,10 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - -#include "jemalloc/internal/peak_event.h" #include "jemalloc/internal/peak.h" +#include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" +#include "jemalloc/internal/tsd.h" /* Update the peak with current tsd state. */ void diff --git a/src/prof_stats.c b/src/prof_stats.c index db248be7..81d5e6f6 100644 --- a/src/prof_stats.c +++ b/src/prof_stats.c @@ -1,7 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_stats.h" +#include "jemalloc/internal/tsd.h" bool opt_prof_stats = false; malloc_mutex_t prof_stats_mtx; diff --git a/src/safety_check.c b/src/safety_check.c index d052718d..f11a263b 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -1,5 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/safety_check.h" static safety_check_abort_hook_t safety_check_abort; diff --git a/src/util.c b/src/util.c index 1bcf4fee..a23a5c8c 100644 --- a/src/util.c +++ b/src/util.c @@ -1,6 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/util.h" /* Reads the next size pair in a multi-sized option. */ From da18bdc169dd84a6caba559f9d1928ae3b94bff1 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 18:56:21 -0400 Subject: [PATCH 10/19] Drop umbrella include from 10 more .c files (batch 3) Converted: hpa_hooks, san_bump, sz, cache_bin, bitmap, hpa_central, witness, fxp, buf_writer, edata_cache. No latent hermeticity bugs in headers this batch -- a few .c files just needed previously-transitive includes added (e.g. hpa_central.c needed hpa.h for hpa_supported()). Step 6 (Option B) of the cyclical-dep cleanup, batch 3 of N. --- src/bitmap.c | 2 +- src/buf_writer.c | 3 ++- src/cache_bin.c | 2 +- src/edata_cache.c | 4 +++- src/fxp.c | 2 +- src/hpa_central.c | 2 +- src/hpa_hooks.c | 3 ++- src/san_bump.c | 9 +++++---- src/sz.c | 2 +- src/witness.c | 2 +- 10 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/bitmap.c b/src/bitmap.c index 8ac81a67..c399a05c 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/bitmap.h" /******************************************************************************/ diff --git a/src/buf_writer.c b/src/buf_writer.c index 3c298502..48b94100 100644 --- a/src/buf_writer.c +++ b/src/buf_writer.c @@ -1,7 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/buf_writer.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" static void * diff --git a/src/cache_bin.c b/src/cache_bin.c index ec677948..170e21b0 100644 --- a/src/cache_bin.c +++ b/src/cache_bin.c @@ -1,6 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/base.h" #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/cache_bin.h" #include "jemalloc/internal/safety_check.h" diff --git a/src/edata_cache.c b/src/edata_cache.c index 3ac8273a..68a399da 100644 --- a/src/edata_cache.c +++ b/src/edata_cache.c @@ -1,5 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/witness.h" bool edata_cache_init(edata_cache_t *edata_cache, base_t *base) { diff --git a/src/fxp.c b/src/fxp.c index faeab207..ff3de54e 100644 --- a/src/fxp.c +++ b/src/fxp.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/fxp.h" +#include "jemalloc/internal/malloc_io.h" static bool fxp_isdigit(char c) { diff --git a/src/hpa_central.c b/src/hpa_central.c index b4f770c2..9d75f501 100644 --- a/src/hpa_central.c +++ b/src/hpa_central.c @@ -1,6 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/hpa.h" #include "jemalloc/internal/hpa_central.h" #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/witness.h" diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c index 2ec7029d..1c292732 100644 --- a/src/hpa_hooks.c +++ b/src/hpa_hooks.c @@ -1,8 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/hpa_hooks.h" #include "jemalloc/internal/jemalloc_probe.h" +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/pages.h" static void *hpa_hooks_map(size_t size); static void hpa_hooks_unmap(void *ptr, size_t size); diff --git a/src/san_bump.c b/src/san_bump.c index 11031290..30b90b04 100644 --- a/src/san_bump.c +++ b/src/san_bump.c @@ -1,11 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/san_bump.h" +#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/ehooks.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/pac.h" #include "jemalloc/internal/san.h" -#include "jemalloc/internal/ehooks.h" -#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/san_bump.h" static bool san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac, ehooks_t *ehooks, size_t size); diff --git a/src/sz.c b/src/sz.c index da92f2b4..5cff6f8b 100644 --- a/src/sz.c +++ b/src/sz.c @@ -1,5 +1,5 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + #include "jemalloc/internal/sz.h" JEMALLOC_ALIGNED(CACHELINE) diff --git a/src/witness.c b/src/witness.c index 940b1eae..6f1a17e7 100644 --- a/src/witness.c +++ b/src/witness.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/witness.h" void witness_init(witness_t *witness, const char *name, witness_rank_t rank, From 19978eea2fad9095a4c6634faa35e5d5b29eba70 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 19:04:11 -0400 Subject: [PATCH 11/19] Drop umbrella include from 15 more .c files (batch 4) Converted: prof_stack_range, jemalloc_fork, san, pa_extra, mutex, thread_event_registry, rtree, ehooks, pa, extent_dss, decay, large, nstime, bin, arenas_management. One latent hermeticity bug surfaced: prof_sys.h declares `void bt_init(prof_bt_t *bt, void **vec);` but didn't include prof.h where prof_bt_t is defined. Added the include. Step 6 (Option B) of the cyclical-dep cleanup, batch 4 of N. --- include/jemalloc/internal/prof_sys.h | 1 + src/arenas_management.c | 8 +++++++- src/bin.c | 3 ++- src/decay.c | 1 - src/ehooks.c | 8 +++++++- src/extent_dss.c | 7 ++++++- src/jemalloc_fork.c | 9 ++++++++- src/large.c | 8 +++++++- src/mutex.c | 2 +- src/nstime.c | 4 +--- src/pa.c | 5 +++-- src/pa_extra.c | 3 ++- src/prof_stack_range.c | 1 - src/rtree.c | 2 +- src/san.c | 2 +- src/thread_event_registry.c | 11 +++++++---- 16 files changed, 54 insertions(+), 21 deletions(-) diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h index 0745b991..e671a47c 100644 --- a/include/jemalloc/internal/prof_sys.h +++ b/include/jemalloc/internal/prof_sys.h @@ -4,6 +4,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/base.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" extern malloc_mutex_t prof_dump_filename_mtx; extern base_t *prof_base; diff --git a/src/arenas_management.c b/src/arenas_management.c index 261557b6..e4d2aa75 100644 --- a/src/arenas_management.c +++ b/src/arenas_management.c @@ -1,11 +1,17 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache.h" JEMALLOC_ALIGNED(CACHELINE) atomic_p_t arenas[MALLOCX_ARENA_LIMIT]; diff --git a/src/bin.c b/src/bin.c index 30a78aba..ac17f16a 100644 --- a/src/bin.c +++ b/src/bin.c @@ -1,6 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/bin.h" #include "jemalloc/internal/sc.h" diff --git a/src/decay.c b/src/decay.c index 7bbce2a6..1ed23bcb 100644 --- a/src/decay.c +++ b/src/decay.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/decay.h" diff --git a/src/ehooks.c b/src/ehooks.c index d7abb960..bd9a8ac6 100644 --- a/src/ehooks.c +++ b/src/ehooks.c @@ -1,8 +1,14 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/edata.h" #include "jemalloc/internal/ehooks.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind) { diff --git a/src/extent_dss.c b/src/extent_dss.c index 8fac71a7..16faa0a2 100644 --- a/src/extent_dss.c +++ b/src/extent_dss.c @@ -1,9 +1,14 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_a.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" #include "jemalloc/internal/spin.h" +#include "jemalloc/internal/tsd.h" /******************************************************************************/ /* Data. */ diff --git a/src/jemalloc_fork.c b/src/jemalloc_fork.c index 9bab77e8..3fc89c68 100644 --- a/src/jemalloc_fork.c +++ b/src/jemalloc_fork.c @@ -1,10 +1,17 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/jemalloc_fork.h" #include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tsd.h" /******************************************************************************/ /* diff --git a/src/large.c b/src/large.c index 610c9b6c..197cb0ba 100644 --- a/src/large.c +++ b/src/large.c @@ -1,10 +1,16 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_recent.h" #include "jemalloc/internal/util.h" diff --git a/src/mutex.c b/src/mutex.c index aa2ab665..04a14a24 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/spin.h" #if defined(_WIN32) && !defined(_CRT_SPINCOUNT) diff --git a/src/nstime.c b/src/nstime.c index 0dfbeda1..5517877d 100644 --- a/src/nstime.c +++ b/src/nstime.c @@ -1,9 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - -#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/nstime.h" #define BILLION UINT64_C(1000000000) #define MILLION UINT64_C(1000000) diff --git a/src/pa.c b/src/pa.c index f14fda81..2a560ed9 100644 --- a/src/pa.c +++ b/src/pa.c @@ -1,8 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/san.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/hpa.h" +#include "jemalloc/internal/pa.h" +#include "jemalloc/internal/san.h" static void pa_nactive_add(pa_shard_t *shard, size_t add_pages) { diff --git a/src/pa_extra.c b/src/pa_extra.c index 17b4449a..24ff2e6d 100644 --- a/src/pa_extra.c +++ b/src/pa_extra.c @@ -1,5 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/pa.h" /* * This file is logically part of the PA module. While pa.c contains the core diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c index 8ebcab8e..ef5e8062 100644 --- a/src/prof_stack_range.c +++ b/src/prof_stack_range.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/prof_sys.h" diff --git a/src/rtree.c b/src/rtree.c index ac27f829..a63f2b01 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" /* * Only the most significant bits of keys passed to rtree_{read,write}() are diff --git a/src/san.c b/src/san.c index 5448c67f..99a3d783 100644 --- a/src/san.c +++ b/src/san.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ehooks.h" +#include "jemalloc/internal/emap.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/tsd.h" diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c index 1db8fa72..a25050e8 100644 --- a/src/thread_event_registry.c +++ b/src/thread_event_registry.c @@ -1,12 +1,15 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/thread_event.h" -#include "jemalloc/internal/thread_event_registry.h" -#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/peak_event.h" #include "jemalloc/internal/prof.h" #include "jemalloc/internal/stats.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/thread_event.h" +#include "jemalloc/internal/thread_event_registry.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/witness.h" static malloc_mutex_t uevents_mu; From a3c20a2320b6c062d15bff6fbcbbe7360fec09a3 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 19:10:11 -0400 Subject: [PATCH 12/19] Drop umbrella include from 10 more .c files (batch 5) Converted: thread_event, emap, sec, eset, tsd, psset, zone, hpdata, ckh, prof_recent. No latent hermeticity bugs in headers this batch -- just .c files that needed previously-transitive includes added (most commonly arena.h, the various jemalloc_internal_inlines_*, mutex.h, tsd.h, witness.h, and prof.h). Step 6 (Option B) of the cyclical-dep cleanup, batch 5 of N. --- src/ckh.c | 5 +++-- src/emap.c | 2 +- src/eset.c | 1 - src/hpdata.c | 1 - src/prof_recent.c | 7 ++++++- src/psset.c | 5 ++--- src/sec.c | 4 ++-- src/thread_event.c | 7 +++++-- src/tsd.c | 16 ++++++++++++++-- src/zone.c | 6 +++++- 10 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/ckh.c b/src/ckh.c index 80688162..1bfcdf2f 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -38,10 +38,11 @@ #include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/util.h" diff --git a/src/emap.c b/src/emap.c index c9a371d2..c6936f8a 100644 --- a/src/emap.c +++ b/src/emap.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" enum emap_lock_result_e { emap_lock_result_success, diff --git a/src/eset.c b/src/eset.c index bdce1834..4d1f8f04 100644 --- a/src/eset.c +++ b/src/eset.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/eset.h" diff --git a/src/hpdata.c b/src/hpdata.c index a538a422..1da4ffa0 100644 --- a/src/hpdata.c +++ b/src/hpdata.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/hpdata.h" diff --git a/src/prof_recent.c b/src/prof_recent.c index f7108bee..dbc0fe4a 100644 --- a/src/prof_recent.c +++ b/src/prof_recent.c @@ -1,10 +1,15 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/buf_writer.h" #include "jemalloc/internal/emitter.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_recent.h" ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT; diff --git a/src/psset.c b/src/psset.c index 4c6ab255..b9b739ad 100644 --- a/src/psset.c +++ b/src/psset.c @@ -1,9 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - -#include "jemalloc/internal/psset.h" #include "jemalloc/internal/fb.h" +#include "jemalloc/internal/psset.h" +#include "jemalloc/internal/sz.h" void psset_init(psset_t *psset) { diff --git a/src/sec.c b/src/sec.c index 493e4629..879fc47e 100644 --- a/src/sec.c +++ b/src/sec.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/sec.h" #include "jemalloc/internal/jemalloc_probe.h" +#include "jemalloc/internal/sec.h" +#include "jemalloc/internal/witness.h" static bool sec_bin_init(sec_bin_t *bin) { diff --git a/src/thread_event.c b/src/thread_event.c index a8c5e2e1..2f2dd711 100644 --- a/src/thread_event.c +++ b/src/thread_event.c @@ -1,9 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" -#include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/tsd.h" static bool te_ctx_has_active_events(te_ctx_t *ctx) { diff --git a/src/tsd.c b/src/tsd.c index 67200f50..b639aac5 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -1,10 +1,22 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_a.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" -#include "jemalloc/internal/san.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/san.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/thread_event.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ diff --git a/src/zone.c b/src/zone.c index 62d2eabb..62957f17 100644 --- a/src/zone.c +++ b/src/zone.c @@ -1,8 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/jemalloc_fork.h" +#include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #ifndef JEMALLOC_ZONE # error "This source file is for zones on Darwin (OS X)." From e7595f7b485416721f34b7b9860983cffb4c1687 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 19:15:16 -0400 Subject: [PATCH 13/19] Drop umbrella include from 8 large .c files (batch 6) Converted: jemalloc_init, prof_log, base, pac, malloc_io, prof, background_thread, pages. No latent hermeticity bugs in headers this batch. All fixes are explicit includes for symbols (arena.h, background_thread.h, jemalloc_internal_externs.h, etc.) that the umbrella was supplying transitively. Step 6 (Option B) of the cyclical-dep cleanup, batch 6 of N. --- src/background_thread.c | 16 +++++++++++++++- src/base.c | 4 +++- src/jemalloc_init.c | 13 ++++++++++++- src/malloc_io.c | 1 - src/pac.c | 5 ++++- src/pages.c | 9 +++++---- src/prof.c | 13 +++++++++---- src/prof_log.c | 7 ++++++- 8 files changed, 54 insertions(+), 14 deletions(-) diff --git a/src/background_thread.c b/src/background_thread.c index 4901856a..f25dc403 100644 --- a/src/background_thread.c +++ b/src/background_thread.c @@ -1,7 +1,21 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/witness.h" JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS diff --git a/src/base.c b/src/base.c index 76227a5e..0ac658b7 100644 --- a/src/base.c +++ b/src/base.c @@ -1,7 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/ehooks.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/sz.h" diff --git a/src/jemalloc_init.c b/src/jemalloc_init.c index 2500a385..b62f10a3 100644 --- a/src/jemalloc_init.c +++ b/src/jemalloc_init.c @@ -1,7 +1,18 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" +#include "jemalloc/internal/witness.h" #include "jemalloc/internal/conf.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/emap.h" diff --git a/src/malloc_io.c b/src/malloc_io.c index e76a6b73..2b8a6564 100644 --- a/src/malloc_io.c +++ b/src/malloc_io.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/util.h" diff --git a/src/pac.c b/src/pac.c index aab2bb1e..caf5f9d5 100644 --- a/src/pac.c +++ b/src/pac.c @@ -1,8 +1,11 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/pac.h" #include "jemalloc/internal/san.h" +#include "jemalloc/internal/witness.h" static inline void pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay, diff --git a/src/pages.c b/src/pages.c index 4bca965a..4f316a9d 100644 --- a/src/pages.c +++ b/src/pages.c @@ -1,11 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/pages.h" - -#include "jemalloc/internal/jemalloc_internal_includes.h" - #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/sc.h" #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT # include diff --git a/src/prof.c b/src/prof.c index a833fed5..4fb5ba5f 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1,16 +1,21 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" -#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/counter.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_hook.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_log.h" #include "jemalloc/internal/prof_recent.h" #include "jemalloc/internal/prof_stats.h" #include "jemalloc/internal/prof_sys.h" -#include "jemalloc/internal/prof_hook.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" diff --git a/src/prof_log.c b/src/prof_log.c index 74f1372f..a5dbe18b 100644 --- a/src/prof_log.c +++ b/src/prof_log.c @@ -1,8 +1,13 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/buf_writer.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/emitter.h" #include "jemalloc/internal/hash.h" From 7a9f6dc34c02be56c32d3350cb6d889bd0c45bea Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 19:22:17 -0400 Subject: [PATCH 14/19] Drop umbrella include from final 10 .c files (batch 7) Converted: prof_sys, conf, hpa, tcache, prof_data, extent, jemalloc, arena, stats, ctl. These are the biggest TUs in the codebase, touching many subsystems, so each needs a broad explicit include set. Still strictly better than the umbrella -- every dep is now visible at the top of the file instead of being hidden behind a single catch-all #include. No latent header hermeticity bugs this batch. After this commit, no .c file uses jemalloc_internal_includes.h. Step 6 (Option B) of the cyclical-dep cleanup, final .c-file batch. --- src/arena.c | 18 ++++++++++++++++-- src/conf.c | 9 ++++++--- src/ctl.c | 13 ++++++++++++- src/extent.c | 14 ++++++++++++-- src/hpa.c | 7 +++---- src/jemalloc.c | 20 +++++++++++++++----- src/prof_data.c | 10 +++++++++- src/prof_sys.c | 8 +++++++- src/stats.c | 9 ++++++++- src/tcache.c | 16 +++++++++++++++- 10 files changed, 103 insertions(+), 21 deletions(-) diff --git a/src/arena.c b/src/arena.c index 84b97549..d17841bb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,16 +1,30 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_a.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/decay.h" #include "jemalloc/internal/ehooks.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" -#include "jemalloc/internal/san.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/san.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" #include "jemalloc/internal/util.h" +#include "jemalloc/internal/witness.h" JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS diff --git a/src/conf.c b/src/conf.c index ecef73f5..14a9a048 100644 --- a/src/conf.c +++ b/src/conf.c @@ -1,8 +1,11 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/conf.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/fxp.h" @@ -10,13 +13,13 @@ #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/util.h" -#include "jemalloc/internal/conf.h" - /* Whether encountered any invalid config options. */ bool had_conf_error; diff --git a/src/ctl.c b/src/ctl.c index e048135a..ef5e57af 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1,22 +1,33 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/inspect.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" #include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_log.h" #include "jemalloc/internal/prof_recent.h" #include "jemalloc/internal/prof_stats.h" #include "jemalloc/internal/prof_sys.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/util.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ diff --git a/src/extent.c b/src/extent.c index cf935c18..708a2807 100644 --- a/src/extent.c +++ b/src/extent.c @@ -1,12 +1,22 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" +#include "jemalloc/internal/edata_cache.h" #include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" -#include "jemalloc/internal/ph.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/pac.h" +#include "jemalloc/internal/ph.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ diff --git a/src/hpa.c b/src/hpa.c index d59b7fc7..a4b3750c 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -1,12 +1,11 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/fb.h" #include "jemalloc/internal/hpa.h" #include "jemalloc/internal/hpa_utils.h" - -#include "jemalloc/internal/fb.h" -#include "jemalloc/internal/witness.h" #include "jemalloc/internal/jemalloc_probe.h" +#include "jemalloc/internal/witness.h" static void hpa_dalloc_batch(tsdn_t *tsdn, hpa_shard_t *shard, edata_list_active_t *list, bool *deferred_work_generated); diff --git a/src/jemalloc.c b/src/jemalloc.c index 6544657d..80d900b8 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1,32 +1,42 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/buf_writer.h" +#include "jemalloc/internal/conf.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/fxp.h" -#include "jemalloc/internal/san.h" #include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/log.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/spin.h" #include "jemalloc/internal/sz.h" -#include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" #include "jemalloc/internal/thread_event.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/util.h" - -#include "jemalloc/internal/conf.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ diff --git a/src/prof_data.c b/src/prof_data.c index 7aa047ac..f3651356 100644 --- a/src/prof_data.c +++ b/src/prof_data.c @@ -1,11 +1,19 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_inlines.h" +#include "jemalloc/internal/prof_sys.h" +#include "jemalloc/internal/witness.h" /* * This file defines and manages the core profiling data structures. diff --git a/src/prof_sys.c b/src/prof_sys.c index be50c0be..0acb9797 100644 --- a/src/prof_sys.c +++ b/src/prof_sys.c @@ -1,10 +1,16 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/buf_writer.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_sys.h" #ifdef JEMALLOC_PROF_LIBUNWIND diff --git a/src/stats.c b/src/stats.c index 65583393..7e30ce8b 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1,13 +1,20 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/emitter.h" #include "jemalloc/internal/fxp.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_stats.h" +#include "jemalloc/internal/tcache.h" static const char *const global_mutex_names[mutex_prof_num_global_mutexes] = { #define OP(mtx) #mtx, diff --git a/src/tcache.c b/src/tcache.c index 8c2f6f4c..16e5e4fd 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -1,12 +1,26 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ From ca1a9236ece93e21b37a4370dcf0088c27685643 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Thu, 28 May 2026 11:20:04 -0400 Subject: [PATCH 15/19] Drop umbrella include from src/jemalloc_cpp.cpp Missed in batch 7 -- my filter only matched *.c, not *.cpp. Without this fix, jemalloc_cpp.cpp still tries to include the umbrella (deleted in the previous commit) and the build fails on the C++ target only. Pure follow-up to commit e5f43c9c. --- src/jemalloc_cpp.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp index ac109bb2..9561ea4f 100644 --- a/src/jemalloc_cpp.cpp +++ b/src/jemalloc_cpp.cpp @@ -7,7 +7,18 @@ extern "C" { #endif #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" #ifdef __cplusplus } From d20b19f76befb1b243ce11bbb4da5be24f7e2f07 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Thu, 28 May 2026 19:09:33 -0400 Subject: [PATCH 16/19] Drop umbrella include from src/malloc_dispatch.c Follow-up for the malloc_dispatch module added earlier in the stack. The previous batch commits only touched files that existed when cyclical_dep_h was written; malloc_dispatch.c was added later. --- src/malloc_dispatch.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/malloc_dispatch.c b/src/malloc_dispatch.c index ea8d2817..34c31ad8 100644 --- a/src/malloc_dispatch.c +++ b/src/malloc_dispatch.c @@ -1,8 +1,16 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/emap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/large.h" +#include "jemalloc/internal/malloc_dispatch_externs.h" +#include "jemalloc/internal/malloc_dispatch_inlines.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" /******************************************************************************/ From 250219596408b47164b383409c8e5569f769b574 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 19:28:21 -0400 Subject: [PATCH 17/19] Delete jemalloc_internal_includes.h (umbrella header) After the per-TU explicit-include conversion in batches 1-7, no .c file in src/ needs the umbrella anymore. test/jemalloc_test.h.in is the last consumer; it's the template for the header used by unit and stress tests that want all of jemalloc's internals visible, so its #include of the umbrella is replaced with the same set of explicit includes the umbrella used to expand to. No behavioral change for tests. With that, the umbrella is gone. Every translation unit now declares the headers it actually uses, and the hidden-transitive- include patterns that motivated this cleanup are no longer possible to introduce silently -- a missing include now fails at the failing file rather than silently working because something upstream pulled in the world. Step 6 (Option B) of the cyclical-dep cleanup, complete. --- .../internal/jemalloc_internal_includes.h | 70 ------------------- test/include/test/jemalloc_test.h.in | 30 +++++++- 2 files changed, 28 insertions(+), 72 deletions(-) delete mode 100644 include/jemalloc/internal/jemalloc_internal_includes.h diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h deleted file mode 100644 index 299695f1..00000000 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ /dev/null @@ -1,70 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_INCLUDES_H -#define JEMALLOC_INTERNAL_INCLUDES_H - -/* - * jemalloc can conceptually be broken into components (arena, tcache, etc.), - * but there are circular dependencies that cannot be broken without - * substantial performance degradation. - * - * Historically, we dealt with this by each header into four sections (types, - * structs, externs, and inlines), and included each header file multiple times - * in this file, picking out the portion we want on each pass using the - * following #defines: - * JEMALLOC_H_TYPES : Preprocessor-defined constants and pseudo-opaque data - * types. - * JEMALLOC_H_STRUCTS : Data structures. - * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. - * JEMALLOC_H_INLINES : Inline functions. - * - * We're moving toward a world in which the dependencies are explicit; each file - * will #include the headers it depends on (rather than relying on them being - * implicitly available via this file including every header file in the - * project). - * - * We're now in an intermediate state: we've broken up the header files to avoid - * having to include each one multiple times, but have not yet moved the - * dependency information into the header files (i.e. we still rely on the - * ordering in this file to ensure all a header's dependencies are available in - * its translation unit). Each component is now broken up into multiple header - * files, corresponding to the sections above (e.g. instead of "foo.h", we now - * have "foo_types.h", "foo_structs.h", "foo_externs.h", "foo_inlines.h"). - * - * Those files which have been converted to explicitly include their - * inter-component dependencies are now in the initial HERMETIC HEADERS - * section. All headers may still rely on jemalloc_preamble.h (which, by fiat, - * must be included first in every translation unit) for system headers and - * global jemalloc definitions, however. - */ - -#include "jemalloc/internal/arena.h" - -/******************************************************************************/ -/* EXTERNS */ -/******************************************************************************/ - -#include "jemalloc/internal/jemalloc_internal_externs.h" -#include "jemalloc/internal/large.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/malloc_dispatch_externs.h" -#include "jemalloc/internal/prof.h" -#include "jemalloc/internal/background_thread.h" - -/******************************************************************************/ -/* INLINES */ -/******************************************************************************/ - -#include "jemalloc/internal/jemalloc_internal_inlines_a.h" -/* - * Include portions of arena code interleaved with tcache code in order to - * resolve circular dependencies. - */ -#include "jemalloc/internal/arena_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" -#include "jemalloc/internal/tcache_inlines.h" -#include "jemalloc/internal/arena_inlines_b.h" -#include "jemalloc/internal/malloc_dispatch_inlines.h" -#include "jemalloc/internal/jemalloc_internal_inlines_c.h" -#include "jemalloc/internal/prof_inlines.h" -#include "jemalloc/internal/background_thread_inlines.h" - -#endif /* JEMALLOC_INTERNAL_INCLUDES_H */ diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 8b139db1..12cedc57 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -47,7 +47,20 @@ extern "C" { # define JEMALLOC_JET # define JEMALLOC_MANGLE # include "jemalloc/internal/jemalloc_preamble.h" -# include "jemalloc/internal/jemalloc_internal_includes.h" +# include "jemalloc/internal/arena.h" +# include "jemalloc/internal/jemalloc_internal_externs.h" +# include "jemalloc/internal/large.h" +# include "jemalloc/internal/tcache.h" +# include "jemalloc/internal/prof.h" +# include "jemalloc/internal/background_thread.h" +# include "jemalloc/internal/jemalloc_internal_inlines_a.h" +# include "jemalloc/internal/arena_inlines_a.h" +# include "jemalloc/internal/jemalloc_internal_inlines_b.h" +# include "jemalloc/internal/tcache_inlines.h" +# include "jemalloc/internal/arena_inlines_b.h" +# include "jemalloc/internal/jemalloc_internal_inlines_c.h" +# include "jemalloc/internal/prof_inlines.h" +# include "jemalloc/internal/background_thread_inlines.h" /******************************************************************************/ /* @@ -92,7 +105,20 @@ extern "C" { # define JEMALLOC_JET # include "jemalloc/internal/jemalloc_preamble.h" -# include "jemalloc/internal/jemalloc_internal_includes.h" +# include "jemalloc/internal/arena.h" +# include "jemalloc/internal/jemalloc_internal_externs.h" +# include "jemalloc/internal/large.h" +# include "jemalloc/internal/tcache.h" +# include "jemalloc/internal/prof.h" +# include "jemalloc/internal/background_thread.h" +# include "jemalloc/internal/jemalloc_internal_inlines_a.h" +# include "jemalloc/internal/arena_inlines_a.h" +# include "jemalloc/internal/jemalloc_internal_inlines_b.h" +# include "jemalloc/internal/tcache_inlines.h" +# include "jemalloc/internal/arena_inlines_b.h" +# include "jemalloc/internal/jemalloc_internal_inlines_c.h" +# include "jemalloc/internal/prof_inlines.h" +# include "jemalloc/internal/background_thread_inlines.h" # include "jemalloc/internal/public_unnamespace.h" # undef JEMALLOC_JET From 5b834df66cd06ea5f8ca3b32e55f25c0c72c08f3 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Thu, 28 May 2026 21:04:33 -0400 Subject: [PATCH 18/19] Move te_prof_sample_event_lookahead into src/jemalloc.c The function only had production callers in src/jemalloc.c. It was parked in prof_inlines.h because its body uses tsd accessors that aren't visible from prof.h. Moving it to file scope in jemalloc.c gets it out of the shared header without losing inlining, and the name drops the misleading te_ prefix -- it isn't a thread_event facility, just a one-off prof helper. Renamed prof_sample_lookahead for clarity. --- include/jemalloc/internal/prof_inlines.h | 26 ------------------------ src/jemalloc.c | 26 ++++++++++++++++++++---- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h index 19dfd1a0..5adf0c4e 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines.h @@ -9,32 +9,6 @@ #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" -/* - * The lookahead functionality facilitates events to be able to lookahead, i.e. - * without touching the event counters, to determine whether an event would be - * triggered. The event counters are not advanced until the end of the - * allocation / deallocation calls, so the lookahead can be useful if some - * preparation work for some event must be done early in the allocation / - * deallocation calls. - * - * Currently only the profiling sampling event needs the lookahead - * functionality, so we don't yet define general purpose lookahead functions. - * - * Defined here rather than prof.h because the inline body depends on tsd - * accessors that aren't visible until tsd inlines are loaded. - */ - -JEMALLOC_ALWAYS_INLINE bool -te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) { - if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { - return false; - } - /* The subtraction is intentionally susceptible to underflow. */ - uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize - - tsd_thread_allocated_last_event_get(tsd); - return accumbytes >= tsd_prof_sample_event_wait_get(tsd); -} - JEMALLOC_ALWAYS_INLINE void prof_active_assert(void) { cassert(config_prof); diff --git a/src/jemalloc.c b/src/jemalloc.c index 80d900b8..e7468724 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -149,6 +149,24 @@ const char *const zero_realloc_mode_names[] = { "abort", }; +/* + * Check whether the next allocation would trip the profiling sampler without + * advancing the event counter (the counter only advances at the end of the + * alloc/dalloc call). Lets the allocation path pre-compute the prof context + * before committing. Lives here -- not in prof_inlines.h -- because jemalloc.c + * is the only production caller. + */ +JEMALLOC_ALWAYS_INLINE bool +prof_sample_lookahead(tsd_t *tsd, size_t usize) { + if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { + return false; + } + /* The subtraction is intentionally susceptible to underflow. */ + uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize + - tsd_thread_allocated_last_event_get(tsd); + return accumbytes >= tsd_prof_sample_event_wait_get(tsd); +} + /* * These are the documented values for junk fill debugging facilities -- see the * man page. @@ -610,7 +628,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { /* If profiling is on, get our profiling context. */ if (config_prof && opt_prof) { bool prof_active = prof_active_get_unlocked(); - bool sample_event = te_prof_sample_event_lookahead(tsd, usize); + bool sample_event = prof_sample_lookahead(tsd, usize); prof_tctx_t *tctx = prof_alloc_prep( tsd, prof_active, sample_event); @@ -1412,7 +1430,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, prof_info_t old_prof_info; prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info); bool prof_active = prof_active_get_unlocked(); - bool sample_event = te_prof_sample_event_lookahead(tsd, usize); + bool sample_event = prof_sample_lookahead(tsd, usize); prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event); void *p; if (unlikely(tctx != PROF_TCTX_SENTINEL)) { @@ -1650,7 +1668,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, usize_max = SC_LARGE_MAXCLASS; } bool prof_active = prof_active_get_unlocked(); - bool sample_event = te_prof_sample_event_lookahead(tsd, usize_max); + bool sample_event = prof_sample_lookahead(tsd, usize_max); prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event); size_t usize; @@ -1685,7 +1703,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, prof_info_get_and_reset_recent( tsd, ptr, &new_alloc_ctx, &prof_info); assert(usize <= usize_max); - sample_event = te_prof_sample_event_lookahead(tsd, usize); + sample_event = prof_sample_lookahead(tsd, usize); prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr, old_usize, &prof_info, sample_event); } From c411b0ab3b148938064b37a2409b92443becf572 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Thu, 28 May 2026 21:10:49 -0400 Subject: [PATCH 19/19] Collapse arena_inlines_a/b and jemalloc_internal_inlines_b into arena_inlines.h The arena_inlines_a.h / arena_inlines_b.h split, and the parallel jemalloc_internal_inlines_b.h file that defined arena_choose() between them, all existed to manage one ordering constraint: arena_choose() had to be defined before arena_choose_maybe_huge() (which calls it), but had to be defined after the tsd/tcache inlines it depends on. Three files, one staged include order, no real semantic boundary. After the malloc_dispatch refactor moved the heaviest tcache-pulling inlines (the malloc/dalloc routing) out of arena_inlines_b.h, the arena-side inlines that remain all belong together. Merge them into a single arena_inlines.h that: - explicitly includes jemalloc_internal_inlines_a.h (for tsd accessors) and tcache.h (for tcache_arena_associate / reassociate externs) -- both were previously pulled transitively; - orders functions so each caller appears after its callee (cheap accessors -> arena_choose family -> the rest), so no forward references are needed; - drops the load-bearing-split comment, which is no longer true. All consumers that included any of the three old headers now include arena_inlines.h. background_thread_inlines.h now pulls a heavier set of transitive includes (prof.h, large.h, mutex.h, ...) than when it only needed cheap accessors; this is acceptable because every TU that includes background_thread_inlines.h already pulls those headers via other paths. --- .../{arena_inlines_b.h => arena_inlines.h} | 154 +++++++++++++++--- include/jemalloc/internal/arena_inlines_a.h | 27 --- .../internal/background_thread_inlines.h | 2 +- .../internal/jemalloc_internal_inlines_b.h | 110 ------------- .../internal/jemalloc_internal_inlines_c.h | 2 +- .../internal/malloc_dispatch_inlines.h | 3 +- include/jemalloc/internal/prof_inlines.h | 2 +- include/jemalloc/internal/tcache_inlines.h | 2 +- src/arena.c | 4 +- src/arenas_management.c | 2 +- src/background_thread.c | 3 +- src/bin.c | 2 +- src/ctl.c | 3 +- src/extent.c | 2 +- src/extent_dss.c | 2 +- src/inspect.c | 2 +- src/jemalloc.c | 3 +- src/jemalloc_cpp.cpp | 3 +- src/jemalloc_fork.c | 2 +- src/jemalloc_init.c | 3 +- src/large.c | 2 +- src/prof.c | 2 +- src/prof_data.c | 2 +- src/prof_log.c | 2 +- src/prof_recent.c | 2 +- src/prof_sys.c | 2 +- src/stats.c | 3 +- src/tcache.c | 3 +- src/tsd.c | 3 +- test/include/test/jemalloc_test.h.in | 8 +- 30 files changed, 159 insertions(+), 203 deletions(-) rename include/jemalloc/internal/{arena_inlines_b.h => arena_inlines.h} (69%) delete mode 100644 include/jemalloc/internal/arena_inlines_a.h delete mode 100644 include/jemalloc/internal/jemalloc_internal_inlines_b.h diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines.h similarity index 69% rename from include/jemalloc/internal/arena_inlines_b.h rename to include/jemalloc/internal/arena_inlines.h index 5cfe7d25..33e91e41 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines.h @@ -1,30 +1,13 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H -#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H - -/* - * This split (arena_inlines_a.h + arena_inlines_b.h) is load-bearing, not - * stylistic. arena_inlines_a.h holds the cheap field accessors that only - * depend on arena.h fields. This file holds the larger inlines that depend - * on arena_choose(), prof, large, and friends. - * - * Merging the two would create a real #include cycle through arena_choose(): - * jemalloc_internal_inlines_b.h defines arena_choose() and pulls in - * arena_inlines_a.h at the top for the cheap accessors. arena_choose() is - * called from arena_choose_maybe_huge() in this file. If that #include - * resolved to a merged "arena_inlines.h", arena_choose_maybe_huge() would - * be parsed before arena_choose() exists, and we would get an implicit - * declaration error -- arena_inlines.h cannot pull in - * jemalloc_internal_inlines_b.h to fix it (that file is mid-parse and its - * include guard is already set). - * - * Keep this file separate from arena_inlines_a.h. - */ +#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_H +#define JEMALLOC_INTERNAL_ARENA_INLINES_H #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/div.h" #include "jemalloc/internal/emap.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" @@ -33,14 +16,139 @@ #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/ticker.h" +/* Cheap field accessors. */ + +static inline unsigned +arena_ind_get(const arena_t *arena) { + return arena->ind; +} + +static inline void +arena_internal_add(arena_t *arena, size_t size) { + atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED); +} + +static inline void +arena_internal_sub(arena_t *arena, size_t size) { + atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED); +} + +static inline size_t +arena_internal_get(const arena_t *arena) { + return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); +} + +static inline bool +arena_is_auto(const arena_t *arena) { + assert(narenas_auto > 0); + + return (arena_ind_get(arena) < manual_arena_base); +} + static inline arena_t * arena_get_from_edata(const edata_t *edata) { return (arena_t *)atomic_load_p( &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED); } +/* Arena selection and migration. */ + +static inline void +thread_migrate_arena(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) { + assert(oldarena != NULL); + assert(newarena != NULL); + + arena_migrate(tsd, oldarena, newarena); + if (tcache_available(tsd)) { + tcache_arena_reassociate(tsd_tsdn(tsd), + tsd_tcache_slowp_get(tsd), newarena); + } +} + +static inline void +percpu_arena_update(tsd_t *tsd, unsigned cpu) { + assert(have_percpu_arena); + arena_t *oldarena = tsd_arena_get(tsd); + assert(oldarena != NULL); + unsigned oldind = arena_ind_get(oldarena); + + if (oldind != cpu) { + unsigned newind = cpu; + arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true); + assert(newarena != NULL); + + thread_migrate_arena(tsd, oldarena, newarena); + } +} + +/* Choose an arena based on a per-thread value. */ +static inline arena_t * +arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { + arena_t *ret; + + if (arena != NULL) { + return arena; + } + + /* During reentrancy, arena 0 is the safest bet. */ + if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) { + return arena_get(tsd_tsdn(tsd), 0, true); + } + + ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); + if (unlikely(ret == NULL)) { + ret = arena_choose_hard(tsd, internal); + assert(ret); + if (tcache_available(tsd)) { + tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd); + if (tcache_slow->arena != NULL) { + /* See comments in tsd_tcache_data_init().*/ + assert(tcache_slow->arena + == arena_get(tsd_tsdn(tsd), 0, false)); + if (tcache_slow->arena != ret) { + tcache_arena_reassociate(tsd_tsdn(tsd), + tcache_slow, ret); + } + } else { + tcache_arena_associate( + tsd_tsdn(tsd), tcache_slow, ret); + } + } + } + + /* + * Note that for percpu arena, if the current arena is outside of the + * auto percpu arena range, (i.e. thread is assigned to a manually + * managed arena), then percpu arena is skipped. + */ + if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) + && !internal + && (arena_ind_get(ret) < percpu_arena_ind_limit(opt_percpu_arena)) + && (ret->last_thd != tsd_tsdn(tsd))) { + unsigned ind = percpu_arena_choose(); + if (arena_ind_get(ret) != ind) { + percpu_arena_update(tsd, ind); + ret = tsd_arena_get(tsd); + } + ret->last_thd = tsd_tsdn(tsd); + } + + return ret; +} + +static inline arena_t * +arena_choose(tsd_t *tsd, arena_t *arena) { + return arena_choose_impl(tsd, arena, false); +} + +static inline arena_t * +arena_ichoose(tsd_t *tsd, arena_t *arena) { + return arena_choose_impl(tsd, arena, true); +} + JEMALLOC_ALWAYS_INLINE arena_t * arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { if (arena != NULL) { @@ -288,4 +396,4 @@ arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) { return shard0 + binshard; } -#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */ +#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_H */ diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h deleted file mode 100644 index d8ecd496..00000000 --- a/include/jemalloc/internal/arena_inlines_a.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H -#define JEMALLOC_INTERNAL_ARENA_INLINES_A_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena.h" - -static inline unsigned -arena_ind_get(const arena_t *arena) { - return arena->ind; -} - -static inline void -arena_internal_add(arena_t *arena, size_t size) { - atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED); -} - -static inline void -arena_internal_sub(arena_t *arena, size_t size) { - atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED); -} - -static inline size_t -arena_internal_get(const arena_t *arena) { - return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); -} - -#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */ diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h index 67517626..ba6e6644 100644 --- a/include/jemalloc/internal/background_thread_inlines.h +++ b/include/jemalloc/internal/background_thread_inlines.h @@ -2,7 +2,7 @@ #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_inlines_a.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/background_thread.h" diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h deleted file mode 100644 index 2c91cb77..00000000 --- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_INLINES_B_H -#define JEMALLOC_INTERNAL_INLINES_B_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_inlines_a.h" -#include "jemalloc/internal/arenas_management.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/jemalloc_internal_inlines_a.h" - -static inline void -thread_migrate_arena(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) { - assert(oldarena != NULL); - assert(newarena != NULL); - - arena_migrate(tsd, oldarena, newarena); - if (tcache_available(tsd)) { - tcache_arena_reassociate(tsd_tsdn(tsd), - tsd_tcache_slowp_get(tsd), newarena); - } -} - -static inline void -percpu_arena_update(tsd_t *tsd, unsigned cpu) { - assert(have_percpu_arena); - arena_t *oldarena = tsd_arena_get(tsd); - assert(oldarena != NULL); - unsigned oldind = arena_ind_get(oldarena); - - if (oldind != cpu) { - unsigned newind = cpu; - arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true); - assert(newarena != NULL); - - thread_migrate_arena(tsd, oldarena, newarena); - } -} - -/* Choose an arena based on a per-thread value. */ -static inline arena_t * -arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { - arena_t *ret; - - if (arena != NULL) { - return arena; - } - - /* During reentrancy, arena 0 is the safest bet. */ - if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) { - return arena_get(tsd_tsdn(tsd), 0, true); - } - - ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); - if (unlikely(ret == NULL)) { - ret = arena_choose_hard(tsd, internal); - assert(ret); - if (tcache_available(tsd)) { - tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd); - if (tcache_slow->arena != NULL) { - /* See comments in tsd_tcache_data_init().*/ - assert(tcache_slow->arena - == arena_get(tsd_tsdn(tsd), 0, false)); - if (tcache_slow->arena != ret) { - tcache_arena_reassociate(tsd_tsdn(tsd), - tcache_slow, ret); - } - } else { - tcache_arena_associate( - tsd_tsdn(tsd), tcache_slow, ret); - } - } - } - - /* - * Note that for percpu arena, if the current arena is outside of the - * auto percpu arena range, (i.e. thread is assigned to a manually - * managed arena), then percpu arena is skipped. - */ - if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) - && !internal - && (arena_ind_get(ret) < percpu_arena_ind_limit(opt_percpu_arena)) - && (ret->last_thd != tsd_tsdn(tsd))) { - unsigned ind = percpu_arena_choose(); - if (arena_ind_get(ret) != ind) { - percpu_arena_update(tsd, ind); - ret = tsd_arena_get(tsd); - } - ret->last_thd = tsd_tsdn(tsd); - } - - return ret; -} - -static inline arena_t * -arena_choose(tsd_t *tsd, arena_t *arena) { - return arena_choose_impl(tsd, arena, false); -} - -static inline arena_t * -arena_ichoose(tsd_t *tsd, arena_t *arena) { - return arena_choose_impl(tsd, arena, true); -} - -static inline bool -arena_is_auto(const arena_t *arena) { - assert(narenas_auto > 0); - - return (arena_ind_get(arena) < manual_arena_base); -} - -#endif /* JEMALLOC_INTERNAL_INLINES_B_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 389cf6cf..6bec6d6f 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -3,7 +3,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_types.h" diff --git a/include/jemalloc/internal/malloc_dispatch_inlines.h b/include/jemalloc/internal/malloc_dispatch_inlines.h index 79405120..b6145c85 100644 --- a/include/jemalloc/internal/malloc_dispatch_inlines.h +++ b/include/jemalloc/internal/malloc_dispatch_inlines.h @@ -3,11 +3,10 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/bin.h" #include "jemalloc/internal/div.h" #include "jemalloc/internal/emap.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/large.h" #include "jemalloc/internal/malloc_dispatch_externs.h" diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h index 5adf0c4e..e3e63f25 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines.h @@ -2,7 +2,7 @@ #define JEMALLOC_INTERNAL_PROF_INLINES_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/prof.h" #include "jemalloc/internal/safety_check.h" diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 2b767391..7abcbb40 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -4,7 +4,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/bin.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/large.h" #include "jemalloc/internal/san.h" diff --git a/src/arena.c b/src/arena.c index d17841bb..c4a31f63 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,8 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_a.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/background_thread_inlines.h" @@ -12,7 +11,6 @@ #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" diff --git a/src/arenas_management.c b/src/arenas_management.c index e4d2aa75..394303a3 100644 --- a/src/arenas_management.c +++ b/src/arenas_management.c @@ -6,7 +6,7 @@ #include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" diff --git a/src/background_thread.c b/src/background_thread.c index f25dc403..dcda912f 100644 --- a/src/background_thread.c +++ b/src/background_thread.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/background_thread.h" @@ -9,7 +9,6 @@ #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" diff --git a/src/bin.c b/src/bin.c index ac17f16a..694579b9 100644 --- a/src/bin.c +++ b/src/bin.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/bin.h" #include "jemalloc/internal/sc.h" diff --git a/src/ctl.c b/src/ctl.c index ef5e57af..3d628429 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/background_thread.h" @@ -11,7 +11,6 @@ #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/inspect.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" diff --git a/src/extent.c b/src/extent.c index 708a2807..2e2977f3 100644 --- a/src/extent.c +++ b/src/extent.c @@ -10,7 +10,7 @@ #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/pac.h" #include "jemalloc/internal/ph.h" diff --git a/src/extent_dss.c b/src/extent_dss.c index 16faa0a2..16dbe8a2 100644 --- a/src/extent_dss.c +++ b/src/extent_dss.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_a.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/edata_cache.h" #include "jemalloc/internal/extent.h" diff --git a/src/inspect.c b/src/inspect.c index b2a961b2..587d9236 100644 --- a/src/inspect.c +++ b/src/inspect.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/bin_info.h" #include "jemalloc/internal/edata.h" #include "jemalloc/internal/emap.h" diff --git a/src/jemalloc.c b/src/jemalloc.c index e7468724..12fc5f6e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/atomic.h" @@ -15,7 +15,6 @@ #include "jemalloc/internal/fxp.h" #include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/large.h" diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp index 9561ea4f..193b536b 100644 --- a/src/jemalloc_cpp.cpp +++ b/src/jemalloc_cpp.cpp @@ -9,10 +9,9 @@ extern "C" { #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/prof.h" diff --git a/src/jemalloc_fork.c b/src/jemalloc_fork.c index 3fc89c68..6d23c47e 100644 --- a/src/jemalloc_fork.c +++ b/src/jemalloc_fork.c @@ -7,7 +7,7 @@ #include "jemalloc/internal/jemalloc_fork.h" #include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/prof.h" #include "jemalloc/internal/tcache.h" diff --git a/src/jemalloc_init.c b/src/jemalloc_init.c index b62f10a3..88ae41c3 100644 --- a/src/jemalloc_init.c +++ b/src/jemalloc_init.c @@ -1,12 +1,11 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_inlines.h" diff --git a/src/large.c b/src/large.c index 197cb0ba..f5894cfa 100644 --- a/src/large.c +++ b/src/large.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/extent.h" diff --git a/src/prof.c b/src/prof.c index 4fb5ba5f..eff0fc76 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/counter.h" #include "jemalloc/internal/ctl.h" diff --git a/src/prof_data.c b/src/prof_data.c index f3651356..d11f7907 100644 --- a/src/prof_data.c +++ b/src/prof_data.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/hash.h" diff --git a/src/prof_log.c b/src/prof_log.c index a5dbe18b..a1f8dfa0 100644 --- a/src/prof_log.c +++ b/src/prof_log.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/buf_writer.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" diff --git a/src/prof_recent.c b/src/prof_recent.c index dbc0fe4a..23146ec4 100644 --- a/src/prof_recent.c +++ b/src/prof_recent.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/buf_writer.h" #include "jemalloc/internal/emitter.h" diff --git a/src/prof_sys.c b/src/prof_sys.c index 0acb9797..7067b152 100644 --- a/src/prof_sys.c +++ b/src/prof_sys.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/buf_writer.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" diff --git a/src/stats.c b/src/stats.c index 7e30ce8b..bf016d7c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1,14 +1,13 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/emitter.h" #include "jemalloc/internal/fxp.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex_prof.h" #include "jemalloc/internal/prof.h" diff --git a/src/tcache.c b/src/tcache.c index 16e5e4fd..012579bd 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -1,14 +1,13 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/base.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" diff --git a/src/tsd.c b/src/tsd.c index b639aac5..814a4e70 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -1,13 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/arena_inlines_a.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/prof.h" diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 12cedc57..c263c32a 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -54,10 +54,8 @@ extern "C" { # include "jemalloc/internal/prof.h" # include "jemalloc/internal/background_thread.h" # include "jemalloc/internal/jemalloc_internal_inlines_a.h" -# include "jemalloc/internal/arena_inlines_a.h" -# include "jemalloc/internal/jemalloc_internal_inlines_b.h" +# include "jemalloc/internal/arena_inlines.h" # include "jemalloc/internal/tcache_inlines.h" -# include "jemalloc/internal/arena_inlines_b.h" # include "jemalloc/internal/jemalloc_internal_inlines_c.h" # include "jemalloc/internal/prof_inlines.h" # include "jemalloc/internal/background_thread_inlines.h" @@ -112,10 +110,8 @@ extern "C" { # include "jemalloc/internal/prof.h" # include "jemalloc/internal/background_thread.h" # include "jemalloc/internal/jemalloc_internal_inlines_a.h" -# include "jemalloc/internal/arena_inlines_a.h" -# include "jemalloc/internal/jemalloc_internal_inlines_b.h" +# include "jemalloc/internal/arena_inlines.h" # include "jemalloc/internal/tcache_inlines.h" -# include "jemalloc/internal/arena_inlines_b.h" # include "jemalloc/internal/jemalloc_internal_inlines_c.h" # include "jemalloc/internal/prof_inlines.h" # include "jemalloc/internal/background_thread_inlines.h"