diff --git a/Makefile.in b/Makefile.in index f939350f..13bc5a24 100644 --- a/Makefile.in +++ b/Makefile.in @@ -132,6 +132,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/inspect.c \ $(srcroot)src/large.c \ $(srcroot)src/log.c \ + $(srcroot)src/malloc_dispatch.c \ $(srcroot)src/malloc_io.c \ $(srcroot)src/conf.c \ $(srcroot)src/mutex.c \ diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena.h similarity index 52% rename from include/jemalloc/internal/arena_externs.h rename to include/jemalloc/internal/arena.h index 06189d56..a9f4cc02 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena.h @@ -1,12 +1,183 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H -#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H +#ifndef JEMALLOC_INTERNAL_ARENA_H +#define JEMALLOC_INTERNAL_ARENA_H #include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/arena_decay_constants.h" +#include "jemalloc/internal/sc.h" + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +/* Default decay times in milliseconds. */ +#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000) +#define MUZZY_DECAY_MS_DEFAULT (0) +/* Maximum length of the arena name. */ +#define ARENA_NAME_LEN 32 + +typedef struct arena_s arena_t; + +typedef enum { + percpu_arena_mode_names_base = 0, /* Used for options processing. */ + + /* + * *_uninit are used only during bootstrapping, and must correspond + * to initialized variant plus percpu_arena_mode_enabled_base. + */ + percpu_arena_uninit = 0, + per_phycpu_arena_uninit = 1, + + /* All non-disabled modes must come after percpu_arena_disabled. */ + percpu_arena_disabled = 2, + + percpu_arena_mode_names_limit = 3, /* Used for options processing. */ + percpu_arena_mode_enabled_base = 3, + + percpu_arena = 3, + per_phycpu_arena = 4 /* Hyper threads share arena. */ +} percpu_arena_mode_t; + +#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) +#define PERCPU_ARENA_DEFAULT percpu_arena_disabled + +/* + * When allocation_size >= oversize_threshold, use the dedicated huge arena + * (unless have explicitly spicified arena index). 0 disables the feature. + */ +#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20) + +struct arena_config_s { + /* extent hooks to be used for the arena */ + extent_hooks_t *extent_hooks; + + /* + * Use extent hooks for metadata (base) allocations when true. + */ + bool metadata_use_hooks; +}; + +typedef struct arena_config_s arena_config_t; + +extern const arena_config_t arena_config_default; + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + #include "jemalloc/internal/arena_stats.h" +#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/bin.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/counter.h" +#include "jemalloc/internal/ecache.h" +#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/pa.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/ticker.h" + +struct arena_s { + /* + * Number of threads currently assigned to this arena. Each thread has + * two distinct assignments, one for application-serving allocation, and + * the other for internal metadata allocation. Internal metadata must + * not be allocated from arenas explicitly created via the arenas.create + * mallctl, because the arena..reset mallctl indiscriminately + * discards all allocations for the affected arena. + * + * 0: Application allocation. + * 1: Internal metadata allocation. + * + * Synchronization: atomic. + */ + atomic_u_t nthreads[2]; + + /* Next bin shard for binding new threads. Synchronization: atomic. */ + atomic_u_t binshard_next; + + /* + * When percpu_arena is enabled, to amortize the cost of reading / + * updating the current CPU id, track the most recent thread accessing + * this arena, and only read CPU if there is a mismatch. + */ + tsdn_t *last_thd; + + /* Synchronization: internal. */ + arena_stats_t stats; + + /* + * List of cache_bin_array_descriptors for extant threads associated + * with this arena. Stats from these are merged incrementally, and at + * exit if opt_stats_print is enabled. + * + * Synchronization: cache_bin_array_descriptor_ql_mtx. + */ + ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; + malloc_mutex_t cache_bin_array_descriptor_ql_mtx; + + /* + * Represents a dss_prec_t, but atomically. + * + * Synchronization: atomic. + */ + atomic_u_t dss_prec; + + /* + * Extant large allocations. + * + * Synchronization: large_mtx. + */ + edata_list_active_t large; + /* Synchronizes all large allocation/update/deallocation. */ + malloc_mutex_t large_mtx; + + /* The page-level allocator shard this arena uses. */ + pa_shard_t pa_shard; + + /* + * A cached copy of base->ind. This can get accessed on hot paths; + * looking it up in base requires an extra pointer hop / cache miss. + */ + unsigned ind; + + /* + * Base allocator, from which arena metadata are allocated. + * + * Synchronization: internal. + */ + base_t *base; + /* Used to determine uptime. Read-only after initialization. */ + nstime_t create_time; + + /* The name of the arena. */ + char name[ARENA_NAME_LEN]; + + /* + * The arena is allocated alongside its bins; really this is a + * dynamically sized array determined by the binshard settings. + * Enforcing cacheline-alignment to minimize the number of cachelines + * touched on the hot paths. + */ + JEMALLOC_WARN_ON_USAGE( + "Do not use this field directly. " + "Use `arena_get_bin` instead.") + JEMALLOC_ALIGNED(CACHELINE) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + bin_t all_bins[]; +#else + bin_t all_bins[0]; +#endif +}; + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + #include "jemalloc/internal/div.h" #include "jemalloc/internal/emap.h" -#include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/pages.h" #include "jemalloc/internal/stats.h" @@ -70,12 +241,9 @@ cache_bin_sz_t arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, bool slab); -void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, - bool zero, bool slab, tcache_t *tcache); void arena_prof_promote( tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize); -void arena_dalloc_promoted( - tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); +size_t arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr); void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab); void arena_dalloc_small(tsdn_t *tsdn, void *ptr); @@ -84,8 +252,6 @@ void arena_ptr_array_flush(tsd_t *tsd, szind_t binind, arena_t *stats_arena, cache_bin_stats_t merge_stats); bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero, size_t *newsize); -void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache); dss_prec_t arena_dss_prec_get(const arena_t *arena); ehooks_t *arena_get_ehooks(const arena_t *arena); extent_hooks_t *arena_set_extent_hooks( @@ -128,4 +294,4 @@ void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); void arena_postfork_child(tsdn_t *tsdn, arena_t *arena, cache_bin_array_descriptor_t *surviving_desc); -#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */ +#endif /* JEMALLOC_INTERNAL_ARENA_H */ diff --git a/include/jemalloc/internal/arena_decay_constants.h b/include/jemalloc/internal/arena_decay_constants.h new file mode 100644 index 00000000..e98b9624 --- /dev/null +++ b/include/jemalloc/internal/arena_decay_constants.h @@ -0,0 +1,13 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_DECAY_CONSTANTS_H +#define JEMALLOC_INTERNAL_ARENA_DECAY_CONSTANTS_H + +/* + * Minimal header so both arena.h and tsd_internals.h can share decay-related + * constants without dragging the full arena types into the tsd parse chain + * (which is loaded long before arena.h via ckh.h -> tsd.h). + */ + +/* Number of event ticks between time checks. */ +#define ARENA_DECAY_NTICKS_PER_UPDATE 1000 + +#endif /* JEMALLOC_INTERNAL_ARENA_DECAY_CONSTANTS_H */ diff --git a/include/jemalloc/internal/arena_inlines.h b/include/jemalloc/internal/arena_inlines.h new file mode 100644 index 00000000..33e91e41 --- /dev/null +++ b/include/jemalloc/internal/arena_inlines.h @@ -0,0 +1,399 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_H +#define JEMALLOC_INTERNAL_ARENA_INLINES_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/div.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/large.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/ticker.h" + +/* Cheap field accessors. */ + +static inline unsigned +arena_ind_get(const arena_t *arena) { + return arena->ind; +} + +static inline void +arena_internal_add(arena_t *arena, size_t size) { + atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED); +} + +static inline void +arena_internal_sub(arena_t *arena, size_t size) { + atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED); +} + +static inline size_t +arena_internal_get(const arena_t *arena) { + return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); +} + +static inline bool +arena_is_auto(const arena_t *arena) { + assert(narenas_auto > 0); + + return (arena_ind_get(arena) < manual_arena_base); +} + +static inline arena_t * +arena_get_from_edata(const edata_t *edata) { + return (arena_t *)atomic_load_p( + &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED); +} + +/* Arena selection and migration. */ + +static inline void +thread_migrate_arena(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) { + assert(oldarena != NULL); + assert(newarena != NULL); + + arena_migrate(tsd, oldarena, newarena); + if (tcache_available(tsd)) { + tcache_arena_reassociate(tsd_tsdn(tsd), + tsd_tcache_slowp_get(tsd), newarena); + } +} + +static inline void +percpu_arena_update(tsd_t *tsd, unsigned cpu) { + assert(have_percpu_arena); + arena_t *oldarena = tsd_arena_get(tsd); + assert(oldarena != NULL); + unsigned oldind = arena_ind_get(oldarena); + + if (oldind != cpu) { + unsigned newind = cpu; + arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true); + assert(newarena != NULL); + + thread_migrate_arena(tsd, oldarena, newarena); + } +} + +/* Choose an arena based on a per-thread value. */ +static inline arena_t * +arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { + arena_t *ret; + + if (arena != NULL) { + return arena; + } + + /* During reentrancy, arena 0 is the safest bet. */ + if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) { + return arena_get(tsd_tsdn(tsd), 0, true); + } + + ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); + if (unlikely(ret == NULL)) { + ret = arena_choose_hard(tsd, internal); + assert(ret); + if (tcache_available(tsd)) { + tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd); + if (tcache_slow->arena != NULL) { + /* See comments in tsd_tcache_data_init().*/ + assert(tcache_slow->arena + == arena_get(tsd_tsdn(tsd), 0, false)); + if (tcache_slow->arena != ret) { + tcache_arena_reassociate(tsd_tsdn(tsd), + tcache_slow, ret); + } + } else { + tcache_arena_associate( + tsd_tsdn(tsd), tcache_slow, ret); + } + } + } + + /* + * Note that for percpu arena, if the current arena is outside of the + * auto percpu arena range, (i.e. thread is assigned to a manually + * managed arena), then percpu arena is skipped. + */ + if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) + && !internal + && (arena_ind_get(ret) < percpu_arena_ind_limit(opt_percpu_arena)) + && (ret->last_thd != tsd_tsdn(tsd))) { + unsigned ind = percpu_arena_choose(); + if (arena_ind_get(ret) != ind) { + percpu_arena_update(tsd, ind); + ret = tsd_arena_get(tsd); + } + ret->last_thd = tsd_tsdn(tsd); + } + + return ret; +} + +static inline arena_t * +arena_choose(tsd_t *tsd, arena_t *arena) { + return arena_choose_impl(tsd, arena, false); +} + +static inline arena_t * +arena_ichoose(tsd_t *tsd, arena_t *arena) { + return arena_choose_impl(tsd, arena, true); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { + if (arena != NULL) { + return arena; + } + + /* + * For huge allocations, use the dedicated huge arena if both are true: + * 1) is using auto arena selection (i.e. arena == NULL), and 2) the + * thread is not assigned to a manual arena. + */ + arena_t *tsd_arena = tsd_arena_get(tsd); + if (tsd_arena == NULL) { + tsd_arena = arena_choose(tsd, NULL); + } + + size_t threshold = atomic_load_zu( + &tsd_arena->pa_shard.pac.oversize_threshold, ATOMIC_RELAXED); + if (unlikely(size >= threshold) && arena_is_auto(tsd_arena)) { + return arena_choose_huge(tsd); + } + + return tsd_arena; +} + +JEMALLOC_ALWAYS_INLINE bool +large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) { + if (!config_opt_safety_checks) { + return false; + } + + /* + * Eagerly detect double free and sized dealloc bugs for large sizes. + * The cost is low enough (as edata will be accessed anyway) to be + * enabled all the time. + */ + if (unlikely(edata == NULL + || edata_state_get(edata) != extent_state_active)) { + safety_check_fail( + "Invalid deallocation detected: " + "pages being freed (%p) not currently active, " + "possibly caused by double free bugs.", + ptr); + return true; + } + if (unlikely(input_size != edata_usize_get(edata) + || input_size > SC_LARGE_MAXCLASS)) { + safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr, + /* true_size */ edata_usize_get(edata), input_size); + return true; + } + + return false; +} + +JEMALLOC_ALWAYS_INLINE void +arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx, + prof_info_t *prof_info, bool reset_recent) { + cassert(config_prof); + assert(ptr != NULL); + assert(prof_info != NULL); + + edata_t *edata = NULL; + bool is_slab; + + /* Static check. */ + if (alloc_ctx == NULL) { + edata = emap_edata_lookup( + tsd_tsdn(tsd), &arena_emap_global, ptr); + is_slab = edata_slab_get(edata); + } else if (unlikely(!(is_slab = alloc_ctx->slab))) { + edata = emap_edata_lookup( + tsd_tsdn(tsd), &arena_emap_global, ptr); + } + + if (unlikely(!is_slab)) { + /* edata must have been initialized at this point. */ + assert(edata != NULL); + size_t usize = (alloc_ctx == NULL) + ? edata_usize_get(edata) + : emap_alloc_ctx_usize_get(alloc_ctx); + if (reset_recent + && large_dalloc_safety_checks(edata, ptr, usize)) { + prof_info->alloc_tctx = PROF_TCTX_SENTINEL; + return; + } + large_prof_info_get(tsd, edata, prof_info, reset_recent); + } else { + prof_info->alloc_tctx = PROF_TCTX_SENTINEL; + /* + * No need to set other fields in prof_info; they will never be + * accessed if alloc_tctx == PROF_TCTX_SENTINEL. + */ + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_prof_tctx_reset( + tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) { + cassert(config_prof); + assert(ptr != NULL); + + /* Static check. */ + if (alloc_ctx == NULL) { + edata_t *edata = emap_edata_lookup( + tsd_tsdn(tsd), &arena_emap_global, ptr); + if (unlikely(!edata_slab_get(edata))) { + large_prof_tctx_reset(edata); + } + } else { + if (unlikely(!alloc_ctx->slab)) { + edata_t *edata = emap_edata_lookup( + tsd_tsdn(tsd), &arena_emap_global, ptr); + large_prof_tctx_reset(edata); + } + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) { + cassert(config_prof); + assert(ptr != NULL); + + edata_t *edata = emap_edata_lookup( + tsd_tsdn(tsd), &arena_emap_global, ptr); + assert(!edata_slab_get(edata)); + + large_prof_tctx_reset(edata); +} + +JEMALLOC_ALWAYS_INLINE void +arena_prof_info_set( + tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) { + cassert(config_prof); + + assert(!edata_slab_get(edata)); + large_prof_info_set(edata, tctx, size); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) { + if (unlikely(tsdn_null(tsdn))) { + return; + } + tsd_t *tsd = tsdn_tsd(tsdn); + /* + * We use the ticker_geom_t to avoid having per-arena state in the tsd. + * Instead of having a countdown-until-decay timer running for every + * arena in every thread, we flip a coin once per tick, whose + * probability of coming up heads is 1/nticks; this is effectively the + * operation of the ticker_geom_t. Each arena has the same chance of a + * coinflip coming up heads (1/ARENA_DECAY_NTICKS_PER_UPDATE), so we can + * use a single ticker for all of them. + */ + ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd); + uint64_t *prng_state = tsd_prng_statep_get(tsd); + if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks, + tsd_reentrancy_level_get(tsd) > 0))) { + arena_decay(tsdn, arena, false, false); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_tick(tsdn_t *tsdn, arena_t *arena) { + arena_decay_ticks(tsdn, arena, 1); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_aalloc(tsdn_t *tsdn, const void *ptr) { + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); + unsigned arena_ind = edata_arena_ind_get(edata); + return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_salloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + emap_alloc_ctx_t alloc_ctx; + emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); + assert(alloc_ctx.szind != SC_NSIZES); + + return emap_alloc_ctx_usize_get(&alloc_ctx); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_vsalloc(tsdn_t *tsdn, const void *ptr) { + /* + * Return 0 if ptr is not within an extent managed by jemalloc. This + * function has two extra costs relative to isalloc(): + * - The rtree calls cannot claim to be dependent lookups, which induces + * rtree lookup load dependencies. + * - The lookup may fail, so there is an extra branch to check for + * failure. + */ + + emap_full_alloc_ctx_t full_alloc_ctx; + bool missing = emap_full_alloc_ctx_try_lookup( + tsdn, &arena_emap_global, ptr, &full_alloc_ctx); + if (missing) { + return 0; + } + + if (full_alloc_ctx.edata == NULL) { + return 0; + } + assert(edata_state_get(full_alloc_ctx.edata) == extent_state_active); + /* Only slab members should be looked up via interior pointers. */ + assert(edata_addr_get(full_alloc_ctx.edata) == ptr + || edata_slab_get(full_alloc_ctx.edata)); + + assert(full_alloc_ctx.szind != SC_NSIZES); + + return edata_usize_get(full_alloc_ctx.edata); +} + +static inline void +arena_cache_oblivious_randomize( + tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t alignment) { + assert(edata_base_get(edata) == edata_addr_get(edata)); + + if (alignment < PAGE) { + unsigned lg_range = LG_PAGE + - lg_floor(CACHELINE_CEILING(alignment)); + size_t r; + if (!tsdn_null(tsdn)) { + tsd_t *tsd = tsdn_tsd(tsdn); + r = (size_t)prng_lg_range_u64( + tsd_prng_statep_get(tsd), lg_range); + } else { + uint64_t stack_value = (uint64_t)(uintptr_t)&r; + r = (size_t)prng_lg_range_u64(&stack_value, lg_range); + } + uintptr_t random_offset = ((uintptr_t)r) + << (LG_PAGE - lg_range); + edata->e_addr = (void *)((byte_t *)edata->e_addr + + random_offset); + assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) + == edata->e_addr); + } +} + +static inline bin_t * +arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) { + bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]); + return shard0 + binshard; +} + +#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_H */ diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h deleted file mode 100644 index a899928c..00000000 --- a/include/jemalloc/internal/arena_inlines_a.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H -#define JEMALLOC_INTERNAL_ARENA_INLINES_A_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_structs.h" - -static inline unsigned -arena_ind_get(const arena_t *arena) { - return arena->ind; -} - -static inline void -arena_internal_add(arena_t *arena, size_t size) { - atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED); -} - -static inline void -arena_internal_sub(arena_t *arena, size_t size) { - atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED); -} - -static inline size_t -arena_internal_get(const arena_t *arena) { - return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); -} - -#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */ diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h deleted file mode 100644 index 64957c7b..00000000 --- a/include/jemalloc/internal/arena_inlines_b.h +++ /dev/null @@ -1,534 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H -#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" -#include "jemalloc/internal/arena_structs.h" -#include "jemalloc/internal/bin_inlines.h" -#include "jemalloc/internal/div.h" -#include "jemalloc/internal/emap.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" -#include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/large_externs.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/prof_structs.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/safety_check.h" -#include "jemalloc/internal/sc.h" -#include "jemalloc/internal/sz.h" -#include "jemalloc/internal/tcache_inlines.h" -#include "jemalloc/internal/ticker.h" - -static inline arena_t * -arena_get_from_edata(const edata_t *edata) { - return (arena_t *)atomic_load_p( - &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED); -} - -JEMALLOC_ALWAYS_INLINE arena_t * -arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { - if (arena != NULL) { - return arena; - } - - /* - * For huge allocations, use the dedicated huge arena if both are true: - * 1) is using auto arena selection (i.e. arena == NULL), and 2) the - * thread is not assigned to a manual arena. - */ - arena_t *tsd_arena = tsd_arena_get(tsd); - if (tsd_arena == NULL) { - tsd_arena = arena_choose(tsd, NULL); - } - - size_t threshold = atomic_load_zu( - &tsd_arena->pa_shard.pac.oversize_threshold, ATOMIC_RELAXED); - if (unlikely(size >= threshold) && arena_is_auto(tsd_arena)) { - return arena_choose_huge(tsd); - } - - return tsd_arena; -} - -JEMALLOC_ALWAYS_INLINE bool -large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) { - if (!config_opt_safety_checks) { - return false; - } - - /* - * Eagerly detect double free and sized dealloc bugs for large sizes. - * The cost is low enough (as edata will be accessed anyway) to be - * enabled all the time. - */ - if (unlikely(edata == NULL - || edata_state_get(edata) != extent_state_active)) { - safety_check_fail( - "Invalid deallocation detected: " - "pages being freed (%p) not currently active, " - "possibly caused by double free bugs.", - ptr); - return true; - } - if (unlikely(input_size != edata_usize_get(edata) - || input_size > SC_LARGE_MAXCLASS)) { - safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr, - /* true_size */ edata_usize_get(edata), input_size); - return true; - } - - return false; -} - -JEMALLOC_ALWAYS_INLINE void -arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx, - prof_info_t *prof_info, bool reset_recent) { - cassert(config_prof); - assert(ptr != NULL); - assert(prof_info != NULL); - - edata_t *edata = NULL; - bool is_slab; - - /* Static check. */ - if (alloc_ctx == NULL) { - edata = emap_edata_lookup( - tsd_tsdn(tsd), &arena_emap_global, ptr); - is_slab = edata_slab_get(edata); - } else if (unlikely(!(is_slab = alloc_ctx->slab))) { - edata = emap_edata_lookup( - tsd_tsdn(tsd), &arena_emap_global, ptr); - } - - if (unlikely(!is_slab)) { - /* edata must have been initialized at this point. */ - assert(edata != NULL); - size_t usize = (alloc_ctx == NULL) - ? edata_usize_get(edata) - : emap_alloc_ctx_usize_get(alloc_ctx); - if (reset_recent - && large_dalloc_safety_checks(edata, ptr, usize)) { - prof_info->alloc_tctx = PROF_TCTX_SENTINEL; - return; - } - large_prof_info_get(tsd, edata, prof_info, reset_recent); - } else { - prof_info->alloc_tctx = PROF_TCTX_SENTINEL; - /* - * No need to set other fields in prof_info; they will never be - * accessed if alloc_tctx == PROF_TCTX_SENTINEL. - */ - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_prof_tctx_reset( - tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) { - cassert(config_prof); - assert(ptr != NULL); - - /* Static check. */ - if (alloc_ctx == NULL) { - edata_t *edata = emap_edata_lookup( - tsd_tsdn(tsd), &arena_emap_global, ptr); - if (unlikely(!edata_slab_get(edata))) { - large_prof_tctx_reset(edata); - } - } else { - if (unlikely(!alloc_ctx->slab)) { - edata_t *edata = emap_edata_lookup( - tsd_tsdn(tsd), &arena_emap_global, ptr); - large_prof_tctx_reset(edata); - } - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) { - cassert(config_prof); - assert(ptr != NULL); - - edata_t *edata = emap_edata_lookup( - tsd_tsdn(tsd), &arena_emap_global, ptr); - assert(!edata_slab_get(edata)); - - large_prof_tctx_reset(edata); -} - -JEMALLOC_ALWAYS_INLINE void -arena_prof_info_set( - tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) { - cassert(config_prof); - - assert(!edata_slab_get(edata)); - large_prof_info_set(edata, tctx, size); -} - -JEMALLOC_ALWAYS_INLINE void -arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) { - if (unlikely(tsdn_null(tsdn))) { - return; - } - tsd_t *tsd = tsdn_tsd(tsdn); - /* - * We use the ticker_geom_t to avoid having per-arena state in the tsd. - * Instead of having a countdown-until-decay timer running for every - * arena in every thread, we flip a coin once per tick, whose - * probability of coming up heads is 1/nticks; this is effectively the - * operation of the ticker_geom_t. Each arena has the same chance of a - * coinflip coming up heads (1/ARENA_DECAY_NTICKS_PER_UPDATE), so we can - * use a single ticker for all of them. - */ - ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd); - uint64_t *prng_state = tsd_prng_statep_get(tsd); - if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks, - tsd_reentrancy_level_get(tsd) > 0))) { - arena_decay(tsdn, arena, false, false); - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_decay_tick(tsdn_t *tsdn, arena_t *arena) { - arena_decay_ticks(tsdn, arena, 1); -} - -JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, - bool slab, tcache_t *tcache, bool slow_path) { - assert(!tsdn_null(tsdn) || tcache == NULL); - - if (likely(tcache != NULL)) { - if (likely(slab)) { - assert(sz_can_use_slab(size)); - return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache, - size, ind, zero, slow_path); - } else if (likely(tcache_can_cache_large(tcache, ind))) { - return tcache_alloc_large(tsdn_tsd(tsdn), arena, tcache, - size, ind, zero, slow_path); - } - /* (size > tcache_max) case falls through. */ - } - - return arena_malloc_hard(tsdn, arena, size, ind, zero, slab); -} - -JEMALLOC_ALWAYS_INLINE arena_t * -arena_aalloc(tsdn_t *tsdn, const void *ptr) { - edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - unsigned arena_ind = edata_arena_ind_get(edata); - return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_salloc(tsdn_t *tsdn, const void *ptr) { - assert(ptr != NULL); - emap_alloc_ctx_t alloc_ctx; - emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); - assert(alloc_ctx.szind != SC_NSIZES); - - return emap_alloc_ctx_usize_get(&alloc_ctx); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_vsalloc(tsdn_t *tsdn, const void *ptr) { - /* - * Return 0 if ptr is not within an extent managed by jemalloc. This - * function has two extra costs relative to isalloc(): - * - The rtree calls cannot claim to be dependent lookups, which induces - * rtree lookup load dependencies. - * - The lookup may fail, so there is an extra branch to check for - * failure. - */ - - emap_full_alloc_ctx_t full_alloc_ctx; - bool missing = emap_full_alloc_ctx_try_lookup( - tsdn, &arena_emap_global, ptr, &full_alloc_ctx); - if (missing) { - return 0; - } - - if (full_alloc_ctx.edata == NULL) { - return 0; - } - assert(edata_state_get(full_alloc_ctx.edata) == extent_state_active); - /* Only slab members should be looked up via interior pointers. */ - assert(edata_addr_get(full_alloc_ctx.edata) == ptr - || edata_slab_get(full_alloc_ctx.edata)); - - assert(full_alloc_ctx.szind != SC_NSIZES); - - return edata_usize_get(full_alloc_ctx.edata); -} - -static inline void -arena_dalloc_large_no_tcache( - tsdn_t *tsdn, void *ptr, szind_t szind, size_t usize) { - /* - * szind is still needed in this function mainly becuase - * szind < SC_NBINS determines not only if this is a small alloc, - * but also if szind is valid (an inactive extent would have - * szind == SC_NSIZES). - */ - if (config_prof && unlikely(szind < SC_NBINS)) { - arena_dalloc_promoted(tsdn, ptr, NULL, true); - } else { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - if (large_dalloc_safety_checks(edata, ptr, usize)) { - /* See the comment in isfree. */ - return; - } - large_dalloc(tsdn, edata); - } -} - -static inline void -arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { - assert(ptr != NULL); - - emap_alloc_ctx_t alloc_ctx; - emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); - - if (config_debug) { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - assert(alloc_ctx.szind == edata_szind_get(edata)); - assert(alloc_ctx.szind < SC_NSIZES); - assert(alloc_ctx.slab == edata_slab_get(edata)); - assert(emap_alloc_ctx_usize_get(&alloc_ctx) - == edata_usize_get(edata)); - } - - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - arena_dalloc_small(tsdn, ptr); - } else { - arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind, - emap_alloc_ctx_usize_get(&alloc_ctx)); - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind, - size_t usize, bool slow_path) { - assert(!tsdn_null(tsdn) && tcache != NULL); - bool is_sample_promoted = config_prof && szind < SC_NBINS; - if (unlikely(is_sample_promoted)) { - arena_dalloc_promoted(tsdn, ptr, tcache, slow_path); - } else { - if (tcache_can_cache_large(tcache, szind)) { - tcache_dalloc_large( - tsdn_tsd(tsdn), tcache, ptr, szind, slow_path); - } else { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - if (large_dalloc_safety_checks(edata, ptr, usize)) { - /* See the comment in isfree. */ - return; - } - large_dalloc(tsdn, edata); - } - } -} - -JEMALLOC_ALWAYS_INLINE bool -arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) { - if (!config_debug) { - return false; - } - edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - szind_t binind = edata_szind_get(edata); - div_info_t div_info = arena_binind_div_info[binind]; - /* - * Calls the internal function bin_slab_regind_impl because the - * safety check does not require a lock. - */ - size_t regind = bin_slab_regind_impl(&div_info, binind, edata, ptr); - slab_data_t *slab_data = edata_slab_data_get(edata); - const bin_info_t *bin_info = &bin_infos[binind]; - assert(edata_nfree_get(edata) < bin_info->nregs); - if (unlikely(!bitmap_get( - slab_data->bitmap, &bin_info->bitmap_info, regind))) { - safety_check_fail( - "Invalid deallocation detected: the pointer being freed (%p) not " - "currently active, possibly caused by double free bugs.\n", - ptr); - return true; - } - return false; -} - -JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, - emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) { - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(ptr != NULL); - - if (unlikely(tcache == NULL)) { - arena_dalloc_no_tcache(tsdn, ptr); - return; - } - - emap_alloc_ctx_t alloc_ctx; - if (caller_alloc_ctx != NULL) { - alloc_ctx = *caller_alloc_ctx; - } else { - util_assume(tsdn != NULL); - emap_alloc_ctx_lookup( - tsdn, &arena_emap_global, ptr, &alloc_ctx); - } - - if (config_debug) { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - assert(alloc_ctx.szind == edata_szind_get(edata)); - assert(alloc_ctx.szind < SC_NSIZES); - assert(alloc_ctx.slab == edata_slab_get(edata)); - assert(emap_alloc_ctx_usize_get(&alloc_ctx) - == edata_usize_get(edata)); - } - - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) { - return; - } - tcache_dalloc_small( - tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); - } else { - arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, - emap_alloc_ctx_usize_get(&alloc_ctx), slow_path); - } -} - -static inline void -arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { - assert(ptr != NULL); - assert(size <= SC_LARGE_MAXCLASS); - - emap_alloc_ctx_t alloc_ctx; - if (!config_prof || !opt_prof) { - /* - * There is no risk of being confused by a promoted sampled - * object, so base szind and slab on the given size. - */ - szind_t szind = sz_size2index(size); - emap_alloc_ctx_init( - &alloc_ctx, szind, (szind < SC_NBINS), size); - } - - if ((config_prof && opt_prof) || config_debug) { - emap_alloc_ctx_lookup( - tsdn, &arena_emap_global, ptr, &alloc_ctx); - - assert(alloc_ctx.szind == sz_size2index(size)); - assert((config_prof && opt_prof) - || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS)); - - if (config_debug) { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - assert(alloc_ctx.szind == edata_szind_get(edata)); - assert(alloc_ctx.slab == edata_slab_get(edata)); - } - } - - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - arena_dalloc_small(tsdn, ptr); - } else { - arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind, - emap_alloc_ctx_usize_get(&alloc_ctx)); - } -} - -JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, - emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) { - assert(!tsdn_null(tsdn) || tcache == NULL); - assert(ptr != NULL); - assert(size <= SC_LARGE_MAXCLASS); - - if (unlikely(tcache == NULL)) { - arena_sdalloc_no_tcache(tsdn, ptr, size); - return; - } - - emap_alloc_ctx_t alloc_ctx; - if (config_prof && opt_prof) { - if (caller_alloc_ctx == NULL) { - /* Uncommon case and should be a static check. */ - emap_alloc_ctx_lookup( - tsdn, &arena_emap_global, ptr, &alloc_ctx); - assert(alloc_ctx.szind == sz_size2index(size)); - assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size); - } else { - alloc_ctx = *caller_alloc_ctx; - } - } else { - /* - * There is no risk of being confused by a promoted sampled - * object, so base szind and slab on the given size. - */ - alloc_ctx.szind = sz_size2index(size); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); - } - - if (config_debug) { - edata_t *edata = emap_edata_lookup( - tsdn, &arena_emap_global, ptr); - assert(alloc_ctx.szind == edata_szind_get(edata)); - assert(alloc_ctx.slab == edata_slab_get(edata)); - emap_alloc_ctx_init( - &alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, sz_s2u(size)); - assert(emap_alloc_ctx_usize_get(&alloc_ctx) - == edata_usize_get(edata)); - } - - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) { - return; - } - tcache_dalloc_small( - tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); - } else { - arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, - sz_s2u(size), slow_path); - } -} - -static inline void -arena_cache_oblivious_randomize( - tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t alignment) { - assert(edata_base_get(edata) == edata_addr_get(edata)); - - if (alignment < PAGE) { - unsigned lg_range = LG_PAGE - - lg_floor(CACHELINE_CEILING(alignment)); - size_t r; - if (!tsdn_null(tsdn)) { - tsd_t *tsd = tsdn_tsd(tsdn); - r = (size_t)prng_lg_range_u64( - tsd_prng_statep_get(tsd), lg_range); - } else { - uint64_t stack_value = (uint64_t)(uintptr_t)&r; - r = (size_t)prng_lg_range_u64(&stack_value, lg_range); - } - uintptr_t random_offset = ((uintptr_t)r) - << (LG_PAGE - lg_range); - edata->e_addr = (void *)((byte_t *)edata->e_addr - + random_offset); - assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) - == edata->e_addr); - } -} - -static inline bin_t * -arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) { - bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]); - return shard0 + binshard; -} - -#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */ diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h deleted file mode 100644 index ccab0a17..00000000 --- a/include/jemalloc/internal/arena_structs.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_H -#define JEMALLOC_INTERNAL_ARENA_STRUCTS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_stats.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/bin.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/counter.h" -#include "jemalloc/internal/ecache.h" -#include "jemalloc/internal/edata_cache.h" -#include "jemalloc/internal/extent_dss.h" -#include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/pa.h" -#include "jemalloc/internal/ql.h" -#include "jemalloc/internal/sc.h" -#include "jemalloc/internal/ticker.h" - -struct arena_s { - /* - * Number of threads currently assigned to this arena. Each thread has - * two distinct assignments, one for application-serving allocation, and - * the other for internal metadata allocation. Internal metadata must - * not be allocated from arenas explicitly created via the arenas.create - * mallctl, because the arena..reset mallctl indiscriminately - * discards all allocations for the affected arena. - * - * 0: Application allocation. - * 1: Internal metadata allocation. - * - * Synchronization: atomic. - */ - atomic_u_t nthreads[2]; - - /* Next bin shard for binding new threads. Synchronization: atomic. */ - atomic_u_t binshard_next; - - /* - * When percpu_arena is enabled, to amortize the cost of reading / - * updating the current CPU id, track the most recent thread accessing - * this arena, and only read CPU if there is a mismatch. - */ - tsdn_t *last_thd; - - /* Synchronization: internal. */ - arena_stats_t stats; - - /* - * List of cache_bin_array_descriptors for extant threads associated - * with this arena. Stats from these are merged incrementally, and at - * exit if opt_stats_print is enabled. - * - * Synchronization: cache_bin_array_descriptor_ql_mtx. - */ - ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; - malloc_mutex_t cache_bin_array_descriptor_ql_mtx; - - /* - * Represents a dss_prec_t, but atomically. - * - * Synchronization: atomic. - */ - atomic_u_t dss_prec; - - /* - * Extant large allocations. - * - * Synchronization: large_mtx. - */ - edata_list_active_t large; - /* Synchronizes all large allocation/update/deallocation. */ - malloc_mutex_t large_mtx; - - /* The page-level allocator shard this arena uses. */ - pa_shard_t pa_shard; - - /* - * A cached copy of base->ind. This can get accessed on hot paths; - * looking it up in base requires an extra pointer hop / cache miss. - */ - unsigned ind; - - /* - * Base allocator, from which arena metadata are allocated. - * - * Synchronization: internal. - */ - base_t *base; - /* Used to determine uptime. Read-only after initialization. */ - nstime_t create_time; - - /* The name of the arena. */ - char name[ARENA_NAME_LEN]; - - /* - * The arena is allocated alongside its bins; really this is a - * dynamically sized array determined by the binshard settings. - * Enforcing cacheline-alignment to minimize the number of cachelines - * touched on the hot paths. - */ - JEMALLOC_WARN_ON_USAGE( - "Do not use this field directly. " - "Use `arena_get_bin` instead.") - JEMALLOC_ALIGNED(CACHELINE) -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - bin_t all_bins[]; -#else - bin_t all_bins[0]; -#endif -}; - -#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */ diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h deleted file mode 100644 index c586164f..00000000 --- a/include/jemalloc/internal/arena_types.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H -#define JEMALLOC_INTERNAL_ARENA_TYPES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/sc.h" - -/* Default decay times in milliseconds. */ -#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000) -#define MUZZY_DECAY_MS_DEFAULT (0) -/* Number of event ticks between time checks. */ -#define ARENA_DECAY_NTICKS_PER_UPDATE 1000 -/* Maximum length of the arena name. */ -#define ARENA_NAME_LEN 32 - -typedef struct arena_s arena_t; - -typedef enum { - percpu_arena_mode_names_base = 0, /* Used for options processing. */ - - /* - * *_uninit are used only during bootstrapping, and must correspond - * to initialized variant plus percpu_arena_mode_enabled_base. - */ - percpu_arena_uninit = 0, - per_phycpu_arena_uninit = 1, - - /* All non-disabled modes must come after percpu_arena_disabled. */ - percpu_arena_disabled = 2, - - percpu_arena_mode_names_limit = 3, /* Used for options processing. */ - percpu_arena_mode_enabled_base = 3, - - percpu_arena = 3, - per_phycpu_arena = 4 /* Hyper threads share arena. */ -} percpu_arena_mode_t; - -#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) -#define PERCPU_ARENA_DEFAULT percpu_arena_disabled - -/* - * When allocation_size >= oversize_threshold, use the dedicated huge arena - * (unless have explicitly spicified arena index). 0 disables the feature. - */ -#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20) - -struct arena_config_s { - /* extent hooks to be used for the arena */ - extent_hooks_t *extent_hooks; - - /* - * Use extent hooks for metadata (base) allocations when true. - */ - bool metadata_use_hooks; -}; - -typedef struct arena_config_s arena_config_t; - -extern const arena_config_t arena_config_default; - -#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */ diff --git a/include/jemalloc/internal/arenas_management.h b/include/jemalloc/internal/arenas_management.h index 58d944d6..d990a278 100644 --- a/include/jemalloc/internal/arenas_management.h +++ b/include/jemalloc/internal/arenas_management.h @@ -1,7 +1,7 @@ #ifndef JEMALLOC_INTERNAL_ARENAS_MANAGEMENT_H #define JEMALLOC_INTERNAL_ARENAS_MANAGEMENT_H -#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/tsd_types.h" diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread.h similarity index 58% rename from include/jemalloc/internal/background_thread_structs.h rename to include/jemalloc/internal/background_thread.h index d56673da..db26f1b8 100644 --- a/include/jemalloc/internal/background_thread_structs.h +++ b/include/jemalloc/internal/background_thread.h @@ -1,11 +1,10 @@ -#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H -#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_H #include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/base.h" #include "jemalloc/internal/mutex.h" -/* This file really combines "structs" and "types", but only transitionally. */ - #if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) # define JEMALLOC_PTHREAD_CREATE_WRAPPER #endif @@ -66,4 +65,33 @@ struct background_thread_stats_s { }; typedef struct background_thread_stats_s background_thread_stats_t; -#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */ +extern bool opt_background_thread; +extern size_t opt_max_background_threads; +extern malloc_mutex_t background_thread_lock; +extern atomic_b_t background_thread_enabled_state; +extern size_t n_background_threads; +extern size_t max_background_threads; +extern background_thread_info_t *background_thread_info; + +bool background_thread_create(tsd_t *tsd, unsigned arena_ind); +bool background_threads_enable(tsd_t *tsd); +bool background_threads_disable(tsd_t *tsd); +bool background_thread_is_started(background_thread_info_t *info); +void background_thread_wakeup_early( + background_thread_info_t *info, nstime_t *remaining_sleep); +void background_thread_prefork0(tsdn_t *tsdn); +void background_thread_prefork1(tsdn_t *tsdn); +void background_thread_postfork_parent(tsdn_t *tsdn); +void background_thread_postfork_child(tsdn_t *tsdn); +bool background_thread_stats_read( + tsdn_t *tsdn, background_thread_stats_t *stats); +void background_thread_ctl_init(tsdn_t *tsdn); + +#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER +extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); +#endif +bool background_thread_boot0(void); +bool background_thread_boot1(tsdn_t *tsdn, base_t *base); + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_H */ diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h deleted file mode 100644 index efc0aaa4..00000000 --- a/include/jemalloc/internal/background_thread_externs.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H -#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/background_thread_structs.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/mutex.h" - -extern bool opt_background_thread; -extern size_t opt_max_background_threads; -extern malloc_mutex_t background_thread_lock; -extern atomic_b_t background_thread_enabled_state; -extern size_t n_background_threads; -extern size_t max_background_threads; -extern background_thread_info_t *background_thread_info; - -bool background_thread_create(tsd_t *tsd, unsigned arena_ind); -bool background_threads_enable(tsd_t *tsd); -bool background_threads_disable(tsd_t *tsd); -bool background_thread_is_started(background_thread_info_t *info); -void background_thread_wakeup_early( - background_thread_info_t *info, nstime_t *remaining_sleep); -void background_thread_prefork0(tsdn_t *tsdn); -void background_thread_prefork1(tsdn_t *tsdn); -void background_thread_postfork_parent(tsdn_t *tsdn); -void background_thread_postfork_child(tsdn_t *tsdn); -bool background_thread_stats_read( - tsdn_t *tsdn, background_thread_stats_t *stats); -void background_thread_ctl_init(tsdn_t *tsdn); - -#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER -extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *, - void *(*)(void *), void *__restrict); -#endif -bool background_thread_boot0(void); -bool background_thread_boot1(tsdn_t *tsdn, base_t *base); - -#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */ diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h index e822a3f7..ba6e6644 100644 --- a/include/jemalloc/internal/background_thread_inlines.h +++ b/include/jemalloc/internal/background_thread_inlines.h @@ -2,9 +2,9 @@ #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_inlines_a.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/background_thread_externs.h" +#include "jemalloc/internal/background_thread.h" JEMALLOC_ALWAYS_INLINE bool background_thread_enabled(void) { diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h index 45c6c5dd..626a459c 100644 --- a/include/jemalloc/internal/bin.h +++ b/include/jemalloc/internal/bin.h @@ -4,11 +4,15 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/bin_info.h" #include "jemalloc/internal/bin_stats.h" -#include "jemalloc/internal/bin_types.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/div.h" #include "jemalloc/internal/edata.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/sc.h" +#define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH) +#define N_BIN_SHARDS_DEFAULT 1 + /* * A bin contains a set of extents that are currently being used for slab * allocations. @@ -128,4 +132,96 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) { malloc_mutex_unlock(tsdn, &bin->lock); } +/* + * The dalloc bin info contains just the information that the common paths need + * during tcache flushes. By force-inlining these paths, and using local copies + * of data (so that the compiler knows it's constant), we avoid a whole bunch of + * redundant loads and stores by leaving this information in registers. + */ +typedef struct bin_dalloc_locked_info_s bin_dalloc_locked_info_t; +struct bin_dalloc_locked_info_s { + div_info_t div_info; + uint32_t nregs; + uint64_t ndalloc; +}; + +/* Find the region index of a pointer within a slab. */ +JEMALLOC_ALWAYS_INLINE size_t +bin_slab_regind_impl(const div_info_t *div_info, szind_t binind, + const edata_t *slab, const void *ptr) { + size_t diff, regind; + + /* Freeing a pointer outside the slab can cause assertion failure. */ + assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab)); + assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab)); + /* Freeing an interior pointer can cause assertion failure. */ + assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) + % (uintptr_t)bin_infos[binind].reg_size + == 0); + + diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)); + + /* Avoid doing division with a variable divisor. */ + regind = div_compute(div_info, diff); + assert(regind < bin_infos[binind].nregs); + return regind; +} + +JEMALLOC_ALWAYS_INLINE size_t +bin_slab_regind(const bin_dalloc_locked_info_t *info, szind_t binind, + const edata_t *slab, const void *ptr) { + size_t regind = bin_slab_regind_impl( + &info->div_info, binind, slab, ptr); + return regind; +} + +/* + * Does the deallocation work associated with freeing a single pointer (a + * "step") in between a bin_dalloc_locked begin and end call. + * + * Returns true if arena_slab_dalloc must be called on slab. Doesn't do + * stats updates, which happen during finish (this lets running counts get left + * in a register). + */ +JEMALLOC_ALWAYS_INLINE bool +bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin, + bin_dalloc_locked_info_t *info, szind_t binind, edata_t *slab, + void *ptr) { + const bin_info_t *bin_info = &bin_infos[binind]; + size_t regind = bin_slab_regind(info, binind, slab, ptr); + slab_data_t *slab_data = edata_slab_data_get(slab); + + assert(edata_nfree_get(slab) < bin_info->nregs); + /* Freeing an unallocated pointer can cause assertion failure. */ + assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind)); + + bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind); + edata_nfree_inc(slab); + + if (config_stats) { + info->ndalloc++; + } + + unsigned nfree = edata_nfree_get(slab); + if (nfree == bin_info->nregs) { + bin_dalloc_locked_handle_newly_empty( + tsdn, is_auto, slab, bin); + return true; + } else if (nfree == 1 && slab != bin->slabcur) { + bin_dalloc_locked_handle_newly_nonempty( + tsdn, is_auto, slab, bin); + } + return false; +} + +JEMALLOC_ALWAYS_INLINE void +bin_dalloc_locked_finish(tsdn_t *tsdn, bin_t *bin, + bin_dalloc_locked_info_t *info) { + if (config_stats) { + bin->stats.ndalloc += info->ndalloc; + assert(bin->stats.curregs >= (size_t)info->ndalloc); + bin->stats.curregs -= (size_t)info->ndalloc; + } +} + #endif /* JEMALLOC_INTERNAL_BIN_H */ diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h deleted file mode 100644 index 31fe4818..00000000 --- a/include/jemalloc/internal/bin_inlines.h +++ /dev/null @@ -1,112 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_BIN_INLINES_H -#define JEMALLOC_INTERNAL_BIN_INLINES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/bin.h" -#include "jemalloc/internal/bin_info.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/div.h" -#include "jemalloc/internal/edata.h" -#include "jemalloc/internal/sc.h" - -/* - * The dalloc bin info contains just the information that the common paths need - * during tcache flushes. By force-inlining these paths, and using local copies - * of data (so that the compiler knows it's constant), we avoid a whole bunch of - * redundant loads and stores by leaving this information in registers. - */ -typedef struct bin_dalloc_locked_info_s bin_dalloc_locked_info_t; -struct bin_dalloc_locked_info_s { - div_info_t div_info; - uint32_t nregs; - uint64_t ndalloc; -}; - -/* Find the region index of a pointer within a slab. */ -JEMALLOC_ALWAYS_INLINE size_t -bin_slab_regind_impl(const div_info_t *div_info, szind_t binind, - const edata_t *slab, const void *ptr) { - size_t diff, regind; - - /* Freeing a pointer outside the slab can cause assertion failure. */ - assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab)); - assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab)); - /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) - % (uintptr_t)bin_infos[binind].reg_size - == 0); - - diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)); - - /* Avoid doing division with a variable divisor. */ - regind = div_compute(div_info, diff); - assert(regind < bin_infos[binind].nregs); - return regind; -} - -JEMALLOC_ALWAYS_INLINE size_t -bin_slab_regind(const bin_dalloc_locked_info_t *info, szind_t binind, - const edata_t *slab, const void *ptr) { - size_t regind = bin_slab_regind_impl( - &info->div_info, binind, slab, ptr); - return regind; -} - -JEMALLOC_ALWAYS_INLINE void -bin_dalloc_locked_begin( - bin_dalloc_locked_info_t *info, szind_t binind) { - info->div_info = arena_binind_div_info[binind]; - info->nregs = bin_infos[binind].nregs; - info->ndalloc = 0; -} - -/* - * Does the deallocation work associated with freeing a single pointer (a - * "step") in between a bin_dalloc_locked begin and end call. - * - * Returns true if arena_slab_dalloc must be called on slab. Doesn't do - * stats updates, which happen during finish (this lets running counts get left - * in a register). - */ -JEMALLOC_ALWAYS_INLINE bool -bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin, - bin_dalloc_locked_info_t *info, szind_t binind, edata_t *slab, - void *ptr) { - const bin_info_t *bin_info = &bin_infos[binind]; - size_t regind = bin_slab_regind(info, binind, slab, ptr); - slab_data_t *slab_data = edata_slab_data_get(slab); - - assert(edata_nfree_get(slab) < bin_info->nregs); - /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind)); - - bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind); - edata_nfree_inc(slab); - - if (config_stats) { - info->ndalloc++; - } - - unsigned nfree = edata_nfree_get(slab); - if (nfree == bin_info->nregs) { - bin_dalloc_locked_handle_newly_empty( - tsdn, is_auto, slab, bin); - return true; - } else if (nfree == 1 && slab != bin->slabcur) { - bin_dalloc_locked_handle_newly_nonempty( - tsdn, is_auto, slab, bin); - } - return false; -} - -JEMALLOC_ALWAYS_INLINE void -bin_dalloc_locked_finish(tsdn_t *tsdn, bin_t *bin, - bin_dalloc_locked_info_t *info) { - if (config_stats) { - bin->stats.ndalloc += info->ndalloc; - assert(bin->stats.curregs >= (size_t)info->ndalloc); - bin->stats.curregs -= (size_t)info->ndalloc; - } -} - -#endif /* JEMALLOC_INTERNAL_BIN_INLINES_H */ diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h deleted file mode 100644 index b6bad37e..00000000 --- a/include/jemalloc/internal/bin_types.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_BIN_TYPES_H -#define JEMALLOC_INTERNAL_BIN_TYPES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/sc.h" - -#define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH) -#define N_BIN_SHARDS_DEFAULT 1 - -/* Used in TSD static initializer only. Real init in arena_bind(). */ -#define TSD_BINSHARDS_ZERO_INITIALIZER \ - { \ - { UINT8_MAX } \ - } - -typedef struct tsd_binshards_s tsd_binshards_t; -struct tsd_binshards_s { - uint8_t binshard[SC_NBINS]; -}; - -#endif /* JEMALLOC_INTERNAL_BIN_TYPES_H */ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index e7a8221c..09f1825c 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -3,7 +3,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena_stats.h" -#include "jemalloc/internal/background_thread_structs.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/bin_stats.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/malloc_io.h" diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 217232f7..198d55e1 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -8,13 +8,16 @@ #include "jemalloc/internal/hpdata.h" #include "jemalloc/internal/nstime.h" #include "jemalloc/internal/ph.h" -#include "jemalloc/internal/prof_types.h" #include "jemalloc/internal/ql.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/slab_data.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/typed_list.h" +/* Opaque to edata; only stored as pointers in e_prof_info_t. */ +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_recent_s prof_recent_t; + /* * sizeof(edata_t) is 128 bytes on 64-bit architectures. Ensure the alignment * to free up the low bits in the rtree leaf. diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h index c84f1799..e9aa247a 100644 --- a/include/jemalloc/internal/extent_dss.h +++ b/include/jemalloc/internal/extent_dss.h @@ -2,9 +2,11 @@ #define JEMALLOC_INTERNAL_EXTENT_DSS_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_types.h" #include "jemalloc/internal/tsd_types.h" +/* Forward decl; arena.h includes us, so we can't include arena.h back. */ +typedef struct arena_s arena_t; + typedef enum { dss_prec_disabled = 0, dss_prec_primary = 1, diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index b5b12e91..b0876603 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -1,7 +1,6 @@ #ifndef JEMALLOC_INTERNAL_EXTERNS_H #define JEMALLOC_INTERNAL_EXTERNS_H -#include "jemalloc/internal/arena_types.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/fxp.h" #include "jemalloc/internal/hpa_opts.h" diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h deleted file mode 100644 index 751c112f..00000000 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_INCLUDES_H -#define JEMALLOC_INTERNAL_INCLUDES_H - -/* - * jemalloc can conceptually be broken into components (arena, tcache, etc.), - * but there are circular dependencies that cannot be broken without - * substantial performance degradation. - * - * Historically, we dealt with this by each header into four sections (types, - * structs, externs, and inlines), and included each header file multiple times - * in this file, picking out the portion we want on each pass using the - * following #defines: - * JEMALLOC_H_TYPES : Preprocessor-defined constants and pseudo-opaque data - * types. - * JEMALLOC_H_STRUCTS : Data structures. - * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. - * JEMALLOC_H_INLINES : Inline functions. - * - * We're moving toward a world in which the dependencies are explicit; each file - * will #include the headers it depends on (rather than relying on them being - * implicitly available via this file including every header file in the - * project). - * - * We're now in an intermediate state: we've broken up the header files to avoid - * having to include each one multiple times, but have not yet moved the - * dependency information into the header files (i.e. we still rely on the - * ordering in this file to ensure all a header's dependencies are available in - * its translation unit). Each component is now broken up into multiple header - * files, corresponding to the sections above (e.g. instead of "foo.h", we now - * have "foo_types.h", "foo_structs.h", "foo_externs.h", "foo_inlines.h"). - * - * Those files which have been converted to explicitly include their - * inter-component dependencies are now in the initial HERMETIC HEADERS - * section. All headers may still rely on jemalloc_preamble.h (which, by fiat, - * must be included first in every translation unit) for system headers and - * global jemalloc definitions, however. - */ - -/******************************************************************************/ -/* TYPES */ -/******************************************************************************/ - -#include "jemalloc/internal/arena_types.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/prof_types.h" - -/******************************************************************************/ -/* STRUCTS */ -/******************************************************************************/ - -#include "jemalloc/internal/prof_structs.h" -#include "jemalloc/internal/arena_structs.h" -#include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/background_thread_structs.h" - -/******************************************************************************/ -/* EXTERNS */ -/******************************************************************************/ - -#include "jemalloc/internal/jemalloc_internal_externs.h" -#include "jemalloc/internal/arena_externs.h" -#include "jemalloc/internal/large_externs.h" -#include "jemalloc/internal/tcache_externs.h" -#include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/background_thread_externs.h" - -/******************************************************************************/ -/* INLINES */ -/******************************************************************************/ - -#include "jemalloc/internal/jemalloc_internal_inlines_a.h" -/* - * Include portions of arena code interleaved with tcache code in order to - * resolve circular dependencies. - */ -#include "jemalloc/internal/arena_inlines_a.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" -#include "jemalloc/internal/tcache_inlines.h" -#include "jemalloc/internal/arena_inlines_b.h" -#include "jemalloc/internal/jemalloc_internal_inlines_c.h" -#include "jemalloc/internal/prof_inlines.h" -#include "jemalloc/internal/background_thread_inlines.h" - -#endif /* JEMALLOC_INTERNAL_INCLUDES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h index 646ec5be..9912eff4 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -2,14 +2,13 @@ #define JEMALLOC_INTERNAL_INLINES_A_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" -#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/sc.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/ticker.h" JEMALLOC_ALWAYS_INLINE malloc_cpuid_t diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h deleted file mode 100644 index 2c91cb77..00000000 --- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_INLINES_B_H -#define JEMALLOC_INTERNAL_INLINES_B_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_inlines_a.h" -#include "jemalloc/internal/arenas_management.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/jemalloc_internal_inlines_a.h" - -static inline void -thread_migrate_arena(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) { - assert(oldarena != NULL); - assert(newarena != NULL); - - arena_migrate(tsd, oldarena, newarena); - if (tcache_available(tsd)) { - tcache_arena_reassociate(tsd_tsdn(tsd), - tsd_tcache_slowp_get(tsd), newarena); - } -} - -static inline void -percpu_arena_update(tsd_t *tsd, unsigned cpu) { - assert(have_percpu_arena); - arena_t *oldarena = tsd_arena_get(tsd); - assert(oldarena != NULL); - unsigned oldind = arena_ind_get(oldarena); - - if (oldind != cpu) { - unsigned newind = cpu; - arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true); - assert(newarena != NULL); - - thread_migrate_arena(tsd, oldarena, newarena); - } -} - -/* Choose an arena based on a per-thread value. */ -static inline arena_t * -arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { - arena_t *ret; - - if (arena != NULL) { - return arena; - } - - /* During reentrancy, arena 0 is the safest bet. */ - if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) { - return arena_get(tsd_tsdn(tsd), 0, true); - } - - ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); - if (unlikely(ret == NULL)) { - ret = arena_choose_hard(tsd, internal); - assert(ret); - if (tcache_available(tsd)) { - tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd); - if (tcache_slow->arena != NULL) { - /* See comments in tsd_tcache_data_init().*/ - assert(tcache_slow->arena - == arena_get(tsd_tsdn(tsd), 0, false)); - if (tcache_slow->arena != ret) { - tcache_arena_reassociate(tsd_tsdn(tsd), - tcache_slow, ret); - } - } else { - tcache_arena_associate( - tsd_tsdn(tsd), tcache_slow, ret); - } - } - } - - /* - * Note that for percpu arena, if the current arena is outside of the - * auto percpu arena range, (i.e. thread is assigned to a manually - * managed arena), then percpu arena is skipped. - */ - if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) - && !internal - && (arena_ind_get(ret) < percpu_arena_ind_limit(opt_percpu_arena)) - && (ret->last_thd != tsd_tsdn(tsd))) { - unsigned ind = percpu_arena_choose(); - if (arena_ind_get(ret) != ind) { - percpu_arena_update(tsd, ind); - ret = tsd_arena_get(tsd); - } - ret->last_thd = tsd_tsdn(tsd); - } - - return ret; -} - -static inline arena_t * -arena_choose(tsd_t *tsd, arena_t *arena) { - return arena_choose_impl(tsd, arena, false); -} - -static inline arena_t * -arena_ichoose(tsd_t *tsd, arena_t *arena) { - return arena_choose_impl(tsd, arena, true); -} - -static inline bool -arena_is_auto(const arena_t *arena) { - assert(narenas_auto > 0); - - return (arena_ind_get(arena) < manual_arena_base); -} - -#endif /* JEMALLOC_INTERNAL_INLINES_B_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 541821ae..6bec6d6f 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -2,12 +2,13 @@ #define JEMALLOC_INTERNAL_INLINES_C_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/log.h" +#include "jemalloc/internal/malloc_dispatch_inlines.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/witness.h" @@ -67,7 +68,7 @@ iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); } - ret = arena_malloc( + ret = malloc_dispatch_malloc( tsdn, arena, size, ind, zero, slab, tcache, slow_path); if (config_stats && is_internal && likely(ret != NULL)) { arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); @@ -102,7 +103,8 @@ ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, witness_assert_depth_to_rank( tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache); + ret = malloc_dispatch_palloc( + tsdn, arena, usize, alignment, zero, slab, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); if (config_stats && is_internal && likely(ret != NULL)) { arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); @@ -156,7 +158,7 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { assert(tcache == NULL); } - arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); + malloc_dispatch_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); } JEMALLOC_ALWAYS_INLINE void @@ -169,7 +171,7 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, emap_alloc_ctx_t *alloc_ctx, bool slow_path) { witness_assert_depth_to_rank( tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path); + malloc_dispatch_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path); } JEMALLOC_ALWAYS_INLINE void * @@ -217,8 +219,8 @@ iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, zero, slab, tcache, arena); } - return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero, - slab, tcache); + return malloc_dispatch_ralloc( + tsdn, arena, ptr, oldsize, size, alignment, zero, slab, tcache); } JEMALLOC_ALWAYS_INLINE void * diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large.h similarity index 78% rename from include/jemalloc/internal/large_externs.h rename to include/jemalloc/internal/large.h index 82abd7de..bf63bc3d 100644 --- a/include/jemalloc/internal/large_externs.h +++ b/include/jemalloc/internal/large.h @@ -1,9 +1,13 @@ -#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H -#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H +#ifndef JEMALLOC_INTERNAL_LARGE_H +#define JEMALLOC_INTERNAL_LARGE_H #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/edata.h" +/* Forward decls; only used as pointer types below. */ +typedef struct arena_s arena_t; +typedef struct prof_info_s prof_info_t; + void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); void *large_palloc( tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero); @@ -20,4 +24,4 @@ void large_prof_info_get( void large_prof_tctx_reset(edata_t *edata); void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size); -#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */ +#endif /* JEMALLOC_INTERNAL_LARGE_H */ diff --git a/include/jemalloc/internal/malloc_dispatch_externs.h b/include/jemalloc/internal/malloc_dispatch_externs.h new file mode 100644 index 00000000..197b5d6e --- /dev/null +++ b/include/jemalloc/internal/malloc_dispatch_externs.h @@ -0,0 +1,19 @@ +#ifndef JEMALLOC_INTERNAL_MALLOC_DISPATCH_EXTERNS_H +#define JEMALLOC_INTERNAL_MALLOC_DISPATCH_EXTERNS_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/tsd_types.h" + +/* Forward decls; only used as pointer types below. */ +typedef struct arena_s arena_t; +typedef struct tcache_s tcache_t; + +void *malloc_dispatch_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, bool slab, tcache_t *tcache); +void malloc_dispatch_dalloc_promoted( + tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); +void *malloc_dispatch_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, + size_t oldsize, size_t size, size_t alignment, bool zero, bool slab, + tcache_t *tcache); + +#endif /* JEMALLOC_INTERNAL_MALLOC_DISPATCH_EXTERNS_H */ diff --git a/include/jemalloc/internal/malloc_dispatch_inlines.h b/include/jemalloc/internal/malloc_dispatch_inlines.h new file mode 100644 index 00000000..b6145c85 --- /dev/null +++ b/include/jemalloc/internal/malloc_dispatch_inlines.h @@ -0,0 +1,278 @@ +#ifndef JEMALLOC_INTERNAL_MALLOC_DISPATCH_INLINES_H +#define JEMALLOC_INTERNAL_MALLOC_DISPATCH_INLINES_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" +#include "jemalloc/internal/bin.h" +#include "jemalloc/internal/div.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/large.h" +#include "jemalloc/internal/malloc_dispatch_externs.h" +#include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache_inlines.h" + +JEMALLOC_ALWAYS_INLINE void * +malloc_dispatch_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, + bool zero, bool slab, tcache_t *tcache, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + + if (likely(tcache != NULL)) { + if (likely(slab)) { + assert(sz_can_use_slab(size)); + return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache, + size, ind, zero, slow_path); + } else if (likely(tcache_can_cache_large(tcache, ind))) { + return tcache_alloc_large(tsdn_tsd(tsdn), arena, tcache, + size, ind, zero, slow_path); + } + /* (size > tcache_max) case falls through. */ + } + + return arena_malloc_hard(tsdn, arena, size, ind, zero, slab); +} + +static inline void +malloc_dispatch_dalloc_large_no_tcache( + tsdn_t *tsdn, void *ptr, szind_t szind, size_t usize) { + /* + * szind is still needed in this function mainly because + * szind < SC_NBINS determines not only if this is a small alloc, + * but also if szind is valid (an inactive extent would have + * szind == SC_NSIZES). + */ + if (config_prof && unlikely(szind < SC_NBINS)) { + malloc_dispatch_dalloc_promoted(tsdn, ptr, NULL, true); + } else { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + if (large_dalloc_safety_checks(edata, ptr, usize)) { + /* See the comment in isfree. */ + return; + } + large_dalloc(tsdn, edata); + } +} + +static inline void +malloc_dispatch_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { + assert(ptr != NULL); + + emap_alloc_ctx_t alloc_ctx; + emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); + + if (config_debug) { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + assert(alloc_ctx.szind == edata_szind_get(edata)); + assert(alloc_ctx.szind < SC_NSIZES); + assert(alloc_ctx.slab == edata_slab_get(edata)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) + == edata_usize_get(edata)); + } + + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + arena_dalloc_small(tsdn, ptr); + } else { + malloc_dispatch_dalloc_large_no_tcache( + tsdn, ptr, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx)); + } +} + +JEMALLOC_ALWAYS_INLINE void +malloc_dispatch_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + szind_t szind, size_t usize, bool slow_path) { + assert(!tsdn_null(tsdn) && tcache != NULL); + bool is_sample_promoted = config_prof && szind < SC_NBINS; + if (unlikely(is_sample_promoted)) { + malloc_dispatch_dalloc_promoted(tsdn, ptr, tcache, slow_path); + } else { + if (tcache_can_cache_large(tcache, szind)) { + tcache_dalloc_large( + tsdn_tsd(tsdn), tcache, ptr, szind, slow_path); + } else { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + if (large_dalloc_safety_checks(edata, ptr, usize)) { + /* See the comment in isfree. */ + return; + } + large_dalloc(tsdn, edata); + } + } +} + +JEMALLOC_ALWAYS_INLINE bool +malloc_dispatch_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) { + if (!config_debug) { + return false; + } + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); + szind_t binind = edata_szind_get(edata); + div_info_t div_info = arena_binind_div_info[binind]; + /* + * Calls the internal function bin_slab_regind_impl because the + * safety check does not require a lock. + */ + size_t regind = bin_slab_regind_impl(&div_info, binind, edata, ptr); + slab_data_t *slab_data = edata_slab_data_get(edata); + const bin_info_t *bin_info = &bin_infos[binind]; + assert(edata_nfree_get(edata) < bin_info->nregs); + if (unlikely(!bitmap_get( + slab_data->bitmap, &bin_info->bitmap_info, regind))) { + safety_check_fail( + "Invalid deallocation detected: the pointer being freed (%p) not " + "currently active, possibly caused by double free bugs.\n", + ptr); + return true; + } + return false; +} + +JEMALLOC_ALWAYS_INLINE void +malloc_dispatch_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + + if (unlikely(tcache == NULL)) { + malloc_dispatch_dalloc_no_tcache(tsdn, ptr); + return; + } + + emap_alloc_ctx_t alloc_ctx; + if (caller_alloc_ctx != NULL) { + alloc_ctx = *caller_alloc_ctx; + } else { + util_assume(tsdn != NULL); + emap_alloc_ctx_lookup( + tsdn, &arena_emap_global, ptr, &alloc_ctx); + } + + if (config_debug) { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + assert(alloc_ctx.szind == edata_szind_get(edata)); + assert(alloc_ctx.szind < SC_NSIZES); + assert(alloc_ctx.slab == edata_slab_get(edata)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) + == edata_usize_get(edata)); + } + + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + if (malloc_dispatch_dalloc_small_safety_check(tsdn, ptr)) { + return; + } + tcache_dalloc_small( + tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); + } else { + malloc_dispatch_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx), slow_path); + } +} + +static inline void +malloc_dispatch_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { + assert(ptr != NULL); + assert(size <= SC_LARGE_MAXCLASS); + + emap_alloc_ctx_t alloc_ctx; + if (!config_prof || !opt_prof) { + /* + * There is no risk of being confused by a promoted sampled + * object, so base szind and slab on the given size. + */ + szind_t szind = sz_size2index(size); + emap_alloc_ctx_init( + &alloc_ctx, szind, (szind < SC_NBINS), size); + } + + if ((config_prof && opt_prof) || config_debug) { + emap_alloc_ctx_lookup( + tsdn, &arena_emap_global, ptr, &alloc_ctx); + + assert(alloc_ctx.szind == sz_size2index(size)); + assert((config_prof && opt_prof) + || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS)); + + if (config_debug) { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + assert(alloc_ctx.szind == edata_szind_get(edata)); + assert(alloc_ctx.slab == edata_slab_get(edata)); + } + } + + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + arena_dalloc_small(tsdn, ptr); + } else { + malloc_dispatch_dalloc_large_no_tcache( + tsdn, ptr, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx)); + } +} + +JEMALLOC_ALWAYS_INLINE void +malloc_dispatch_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + assert(size <= SC_LARGE_MAXCLASS); + + if (unlikely(tcache == NULL)) { + malloc_dispatch_sdalloc_no_tcache(tsdn, ptr, size); + return; + } + + emap_alloc_ctx_t alloc_ctx; + if (config_prof && opt_prof) { + if (caller_alloc_ctx == NULL) { + /* Uncommon case and should be a static check. */ + emap_alloc_ctx_lookup( + tsdn, &arena_emap_global, ptr, &alloc_ctx); + assert(alloc_ctx.szind == sz_size2index(size)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size); + } else { + alloc_ctx = *caller_alloc_ctx; + } + } else { + /* + * There is no risk of being confused by a promoted sampled + * object, so base szind and slab on the given size. + */ + alloc_ctx.szind = sz_size2index(size); + alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + } + + if (config_debug) { + edata_t *edata = emap_edata_lookup( + tsdn, &arena_emap_global, ptr); + assert(alloc_ctx.szind == edata_szind_get(edata)); + assert(alloc_ctx.slab == edata_slab_get(edata)); + emap_alloc_ctx_init( + &alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, sz_s2u(size)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) + == edata_usize_get(edata)); + } + + if (likely(alloc_ctx.slab)) { + /* Small allocation. */ + if (malloc_dispatch_dalloc_small_safety_check(tsdn, ptr)) { + return; + } + tcache_dalloc_small( + tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); + } else { + malloc_dispatch_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, + sz_s2u(size), slow_path); + } +} + +#endif /* JEMALLOC_INTERNAL_MALLOC_DISPATCH_INLINES_H */ diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h index 0d1f1627..67e7a71b 100644 --- a/include/jemalloc/internal/peak_event.h +++ b/include/jemalloc/internal/peak_event.h @@ -2,6 +2,7 @@ #define JEMALLOC_INTERNAL_PEAK_EVENT_H #include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/thread_event_registry.h" #include "jemalloc/internal/tsd_types.h" /* diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h new file mode 100644 index 00000000..b0757886 --- /dev/null +++ b/include/jemalloc/internal/prof.h @@ -0,0 +1,427 @@ +#ifndef JEMALLOC_INTERNAL_PROF_H +#define JEMALLOC_INTERNAL_PROF_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/prof_hook.h" +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/thread_event_registry.h" + +/* Forward decl; only base_t * is used as a pointer arg below. */ +typedef struct base_s base_t; + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_info_s prof_info_t; +typedef struct prof_gctx_s prof_gctx_t; +typedef struct prof_tdata_s prof_tdata_t; +typedef struct prof_recent_s prof_recent_t; + +/* Option defaults. */ +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif +#define LG_PROF_SAMPLE_DEFAULT 19 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#ifdef JEMALLOC_PROF_GCC +# define PROF_BT_MAX_LIMIT 256 +#else +# define PROF_BT_MAX_LIMIT UINT_MAX +#endif +#define PROF_BT_MAX_DEFAULT 128 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#ifndef JEMALLOC_PROF +/* Minimize memory bloat for non-prof builds. */ +# define PROF_DUMP_BUFSIZE 1 +#elif defined(JEMALLOC_DEBUG) +/* Use a small buffer size in debug build, mainly to facilitate testing. */ +# define PROF_DUMP_BUFSIZE 16 +#else +# define PROF_DUMP_BUFSIZE 65536 +#endif + +/* Size of size class related tables */ +#ifdef JEMALLOC_PROF +# define PROF_SC_NSIZES SC_NSIZES +#else +/* Minimize memory bloat for non-prof builds. */ +# define PROF_SC_NSIZES 1 +#endif + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all gctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF +# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1) +#else +# define PROF_DUMP_FILENAME_LEN 1 +#endif + +/* Default number of recent allocations to record. */ +#define PROF_RECENT_ALLOC_MAX_DEFAULT 0 + +/* Thread name storage size limit. */ +#define PROF_THREAD_NAME_MAX_LEN 16 + +/* + * Minimum required alignment for sampled allocations. Over-aligning sampled + * allocations allows us to quickly identify them on the dalloc path without + * resorting to metadata lookup. + */ +#define PROF_SAMPLE_ALIGNMENT PAGE +#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK + +/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ +#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U)) + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + void **vec; + unsigned *len; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curobjs_shifted_unbiased; + uint64_t curbytes; + uint64_t curbytes_unbiased; + uint64_t accumobjs; + uint64_t accumobjs_shifted_unbiased; + uint64_t accumbytes; + uint64_t accumbytes_unbiased; +}; + +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; + +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; + + /* + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. + */ + uint64_t thr_uid; + uint64_t thr_discrim; + + /* + * Reference count of how many times this tctx object is referenced in + * recent allocation / deallocation records, protected by tdata->lock. + */ + uint64_t recent_count; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; + + /* + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). + */ + uint64_t tctx_uid; + + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; + + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; + +struct prof_info_s { + /* Time when the allocation was made. */ + nstime_t alloc_time; + /* Points to the prof_tctx_t corresponding to the allocation. */ + prof_tctx_t *alloc_tctx; + /* Allocation request size. */ + size_t alloc_size; +}; + +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this gctx to be in a state of + * limbo due to one of: + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * gctx. + */ + unsigned nlimbo; + + /* + * Tree of profile counters, one for each thread that has allocated in + * this context. + */ + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; + + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; +}; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; + +struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + + /* + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. + */ + uint64_t thr_discrim; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; + + /* Included in heap profile dumps if has content. */ + char thread_name[PROF_THREAD_NAME_MAX_LEN]; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + bool attached; + bool expired; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void **vec; +}; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; + +struct prof_recent_s { + nstime_t alloc_time; + nstime_t dalloc_time; + + ql_elm(prof_recent_t) link; + size_t size; + size_t usize; + atomic_p_t alloc_edata; /* NULL means allocation has been freed. */ + prof_tctx_t *alloc_tctx; + prof_tctx_t *dalloc_tctx; +}; + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + +extern bool opt_prof; +extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; +extern unsigned opt_prof_bt_max; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern bool opt_prof_log; /* Turn logging on at boot. */ +extern char opt_prof_prefix[ +/* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; +extern bool opt_prof_unbias; + +/* Include pid namespace in profile file names. */ +extern bool opt_prof_pid_namespace; + +/* For recording recent allocations */ +extern ssize_t opt_prof_recent_alloc_max; + +/* Whether to use thread name provided by the system or by mallctl. */ +extern bool opt_prof_sys_thread_name; + +/* Whether to record per size class counts and request size totals. */ +extern bool opt_prof_stats; + +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active_state; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + +/* Profile dump interval, measured in bytes allocated. */ +extern uint64_t prof_interval; + +/* + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. + */ +extern size_t lg_prof_sample; + +extern bool prof_booted; + +void prof_backtrace_hook_set(prof_backtrace_hook_t hook); +prof_backtrace_hook_t prof_backtrace_hook_get(void); + +void prof_dump_hook_set(prof_dump_hook_t hook); +prof_dump_hook_t prof_dump_hook_get(void); + +void prof_sample_hook_set(prof_sample_hook_t hook); +prof_sample_hook_t prof_sample_hook_get(void); + +void prof_sample_free_hook_set(prof_sample_free_hook_t hook); +prof_sample_free_hook_t prof_sample_free_hook_get(void); + +/* Functions only accessed in prof_inlines.h */ +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); + +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx); +void prof_malloc_sample_object( + tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx); +void prof_free_sampled_object( + tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info); +prof_tctx_t *prof_tctx_create(tsd_t *tsd); +void prof_idump(tsdn_t *tsdn); +bool prof_mdump(tsd_t *tsd, const char *filename); +void prof_gdump(tsdn_t *tsdn); + +void prof_tdata_cleanup(tsd_t *tsd); +bool prof_active_get(tsdn_t *tsdn); +bool prof_active_set(tsdn_t *tsdn, bool active); +const char *prof_thread_name_get(tsd_t *tsd); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(tsd_t *tsd); +bool prof_thread_active_set(tsd_t *tsd, bool active); +bool prof_thread_active_init_get(tsdn_t *tsdn); +bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); +bool prof_gdump_get(tsdn_t *tsdn); +bool prof_gdump_set(tsdn_t *tsdn, bool active); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(tsd_t *tsd, base_t *base); +void prof_prefork0(tsdn_t *tsdn); +void prof_prefork1(tsdn_t *tsdn); +void prof_postfork_parent(tsdn_t *tsdn); +void prof_postfork_child(tsdn_t *tsdn); + +uint64_t prof_sample_new_event_wait(tsd_t *tsd); +uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd); + +extern te_base_cb_t prof_sample_te_handler; + +#endif /* JEMALLOC_INTERNAL_PROF_H */ diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h deleted file mode 100644 index cfb28988..00000000 --- a/include/jemalloc/internal/prof_externs.h +++ /dev/null @@ -1,132 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H -#define JEMALLOC_INTERNAL_PROF_EXTERNS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prof_hook.h" -#include "jemalloc/internal/thread_event_registry.h" - -extern bool opt_prof; -extern bool opt_prof_active; -extern bool opt_prof_thread_active_init; -extern unsigned opt_prof_bt_max; -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_gdump; /* High-water memory dumping. */ -extern bool opt_prof_final; /* Final profile dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ -extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */ -extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern bool opt_prof_log; /* Turn logging on at boot. */ -extern char opt_prof_prefix[ -/* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF - PATH_MAX + -#endif - 1]; -extern bool opt_prof_unbias; - -/* Include pid namespace in profile file names. */ -extern bool opt_prof_pid_namespace; - -/* For recording recent allocations */ -extern ssize_t opt_prof_recent_alloc_max; - -/* Whether to use thread name provided by the system or by mallctl. */ -extern bool opt_prof_sys_thread_name; - -/* Whether to record per size class counts and request size totals. */ -extern bool opt_prof_stats; - -/* Accessed via prof_active_[gs]et{_unlocked,}(). */ -extern bool prof_active_state; - -/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ -extern bool prof_gdump_val; - -/* Profile dump interval, measured in bytes allocated. */ -extern uint64_t prof_interval; - -/* - * Initialized as opt_lg_prof_sample, and potentially modified during profiling - * resets. - */ -extern size_t lg_prof_sample; - -extern bool prof_booted; - -void prof_backtrace_hook_set(prof_backtrace_hook_t hook); -prof_backtrace_hook_t prof_backtrace_hook_get(void); - -void prof_dump_hook_set(prof_dump_hook_t hook); -prof_dump_hook_t prof_dump_hook_get(void); - -void prof_sample_hook_set(prof_sample_hook_t hook); -prof_sample_hook_t prof_sample_hook_get(void); - -void prof_sample_free_hook_set(prof_sample_free_hook_t hook); -prof_sample_free_hook_t prof_sample_free_hook_get(void); - -/* Functions only accessed in prof_inlines.h */ -prof_tdata_t *prof_tdata_init(tsd_t *tsd); -prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); - -void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx); -void prof_malloc_sample_object( - tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx); -void prof_free_sampled_object( - tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info); -prof_tctx_t *prof_tctx_create(tsd_t *tsd); -void prof_idump(tsdn_t *tsdn); -bool prof_mdump(tsd_t *tsd, const char *filename); -void prof_gdump(tsdn_t *tsdn); - -void prof_tdata_cleanup(tsd_t *tsd); -bool prof_active_get(tsdn_t *tsdn); -bool prof_active_set(tsdn_t *tsdn, bool active); -const char *prof_thread_name_get(tsd_t *tsd); -int prof_thread_name_set(tsd_t *tsd, const char *thread_name); -bool prof_thread_active_get(tsd_t *tsd); -bool prof_thread_active_set(tsd_t *tsd, bool active); -bool prof_thread_active_init_get(tsdn_t *tsdn); -bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); -bool prof_gdump_get(tsdn_t *tsdn); -bool prof_gdump_set(tsdn_t *tsdn, bool active); -void prof_boot0(void); -void prof_boot1(void); -bool prof_boot2(tsd_t *tsd, base_t *base); -void prof_prefork0(tsdn_t *tsdn); -void prof_prefork1(tsdn_t *tsdn); -void prof_postfork_parent(tsdn_t *tsdn); -void prof_postfork_child(tsdn_t *tsdn); - -uint64_t prof_sample_new_event_wait(tsd_t *tsd); -uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd); - -/* - * The lookahead functionality facilitates events to be able to lookahead, i.e. - * without touching the event counters, to determine whether an event would be - * triggered. The event counters are not advanced until the end of the - * allocation / deallocation calls, so the lookahead can be useful if some - * preparation work for some event must be done early in the allocation / - * deallocation calls. - * - * Currently only the profiling sampling event needs the lookahead - * functionality, so we don't yet define general purpose lookahead functions. - */ - -JEMALLOC_ALWAYS_INLINE bool -te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) { - if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { - return false; - } - /* The subtraction is intentionally susceptible to underflow. */ - uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize - - tsd_thread_allocated_last_event_get(tsd); - return accumbytes >= tsd_prof_sample_event_wait_get(tsd); -} - -extern te_base_cb_t prof_sample_te_handler; - -#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */ diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h index 4a36bd7a..e3e63f25 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines.h @@ -2,10 +2,9 @@ #define JEMALLOC_INTERNAL_PROF_INLINES_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" -#include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/prof_structs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h deleted file mode 100644 index d38b15ea..00000000 --- a/include/jemalloc/internal/prof_structs.h +++ /dev/null @@ -1,222 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H -#define JEMALLOC_INTERNAL_PROF_STRUCTS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/edata.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/rb.h" - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - void **vec; - unsigned *len; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* Profiling counters. */ - uint64_t curobjs; - uint64_t curobjs_shifted_unbiased; - uint64_t curbytes; - uint64_t curbytes_unbiased; - uint64_t accumobjs; - uint64_t accumobjs_shifted_unbiased; - uint64_t accumbytes; - uint64_t accumbytes_unbiased; -}; - -typedef enum { - prof_tctx_state_initializing, - prof_tctx_state_nominal, - prof_tctx_state_dumping, - prof_tctx_state_purgatory /* Dumper must finish destroying. */ -} prof_tctx_state_t; - -struct prof_tctx_s { - /* Thread data for thread that performed the allocation. */ - prof_tdata_t *tdata; - - /* - * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be - * defunct during teardown. - */ - uint64_t thr_uid; - uint64_t thr_discrim; - - /* - * Reference count of how many times this tctx object is referenced in - * recent allocation / deallocation records, protected by tdata->lock. - */ - uint64_t recent_count; - - /* Profiling counters, protected by tdata->lock. */ - prof_cnt_t cnts; - - /* Associated global context. */ - prof_gctx_t *gctx; - - /* - * UID that distinguishes multiple tctx's created by the same thread, - * but coexisting in gctx->tctxs. There are two ways that such - * coexistence can occur: - * - A dumper thread can cause a tctx to be retained in the purgatory - * state. - * - Although a single "producer" thread must create all tctx's which - * share the same thr_uid, multiple "consumers" can each concurrently - * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only - * gets called once each time cnts.cur{objs,bytes} drop to 0, but this - * threshold can be hit again before the first consumer finishes - * executing prof_tctx_destroy(). - */ - uint64_t tctx_uid; - - /* Linkage into gctx's tctxs. */ - rb_node(prof_tctx_t) tctx_link; - - /* - * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents - * sample vs destroy race. - */ - bool prepared; - - /* Current dump-related state, protected by gctx->lock. */ - prof_tctx_state_t state; - - /* - * Copy of cnts snapshotted during early dump phase, protected by - * dump_mtx. - */ - prof_cnt_t dump_cnts; -}; -typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; - -struct prof_info_s { - /* Time when the allocation was made. */ - nstime_t alloc_time; - /* Points to the prof_tctx_t corresponding to the allocation. */ - prof_tctx_t *alloc_tctx; - /* Allocation request size. */ - size_t alloc_size; -}; - -struct prof_gctx_s { - /* Protects nlimbo, cnt_summed, and tctxs. */ - malloc_mutex_t *lock; - - /* - * Number of threads that currently cause this gctx to be in a state of - * limbo due to one of: - * - Initializing this gctx. - * - Initializing per thread counters associated with this gctx. - * - Preparing to destroy this gctx. - * - Dumping a heap profile that includes this gctx. - * nlimbo must be 1 (single destroyer) in order to safely destroy the - * gctx. - */ - unsigned nlimbo; - - /* - * Tree of profile counters, one for each thread that has allocated in - * this context. - */ - prof_tctx_tree_t tctxs; - - /* Linkage for tree of contexts to be dumped. */ - rb_node(prof_gctx_t) dump_link; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Associated backtrace. */ - prof_bt_t bt; - - /* Backtrace vector, variable size, referred to by bt. */ - void *vec[1]; -}; -typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; - -struct prof_tdata_s { - malloc_mutex_t *lock; - - /* Monotonically increasing unique thread identifier. */ - uint64_t thr_uid; - - /* - * Monotonically increasing discriminator among tdata structures - * associated with the same thr_uid. - */ - uint64_t thr_discrim; - - rb_node(prof_tdata_t) tdata_link; - - /* - * Counter used to initialize prof_tctx_t's tctx_uid. No locking is - * necessary when incrementing this field, because only one thread ever - * does so. - */ - uint64_t tctx_uid_next; - - /* - * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks - * backtraces for which it has non-zero allocation/deallocation counters - * associated with thread-specific prof_tctx_t objects. Other threads - * may write to prof_tctx_t contents when freeing associated objects. - */ - ckh_t bt2tctx; - - /* Included in heap profile dumps if has content. */ - char thread_name[PROF_THREAD_NAME_MAX_LEN]; - - /* State used to avoid dumping while operating on prof internals. */ - bool enq; - bool enq_idump; - bool enq_gdump; - - /* - * Set to true during an early dump phase for tdata's which are - * currently being dumped. New threads' tdata's have this initialized - * to false so that they aren't accidentally included in later dump - * phases. - */ - bool dumping; - - /* - * True if profiling is active for this tdata's thread - * (thread.prof.active mallctl). - */ - bool active; - - bool attached; - bool expired; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void **vec; -}; -typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; - -struct prof_recent_s { - nstime_t alloc_time; - nstime_t dalloc_time; - - ql_elm(prof_recent_t) link; - size_t size; - size_t usize; - atomic_p_t alloc_edata; /* NULL means allocation has been freed. */ - prof_tctx_t *alloc_tctx; - prof_tctx_t *dalloc_tctx; -}; - -#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */ diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h index 0745b991..e671a47c 100644 --- a/include/jemalloc/internal/prof_sys.h +++ b/include/jemalloc/internal/prof_sys.h @@ -4,6 +4,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/base.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" extern malloc_mutex_t prof_dump_filename_mtx; extern base_t *prof_base; diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h deleted file mode 100644 index 7468885e..00000000 --- a/include/jemalloc/internal/prof_types.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H -#define JEMALLOC_INTERNAL_PROF_TYPES_H - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_tctx_s prof_tctx_t; -typedef struct prof_info_s prof_info_t; -typedef struct prof_gctx_s prof_gctx_t; -typedef struct prof_tdata_s prof_tdata_t; -typedef struct prof_recent_s prof_recent_t; - -/* Option defaults. */ -#ifdef JEMALLOC_PROF -# define PROF_PREFIX_DEFAULT "jeprof" -#else -# define PROF_PREFIX_DEFAULT "" -#endif -#define LG_PROF_SAMPLE_DEFAULT 19 -#define LG_PROF_INTERVAL_DEFAULT -1 - -/* - * Hard limit on stack backtrace depth. The version of prof_backtrace() that - * is based on __builtin_return_address() necessarily has a hard-coded number - * of backtrace frame handlers, and should be kept in sync with this setting. - */ -#ifdef JEMALLOC_PROF_GCC -# define PROF_BT_MAX_LIMIT 256 -#else -# define PROF_BT_MAX_LIMIT UINT_MAX -#endif -#define PROF_BT_MAX_DEFAULT 128 - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#ifndef JEMALLOC_PROF -/* Minimize memory bloat for non-prof builds. */ -# define PROF_DUMP_BUFSIZE 1 -#elif defined(JEMALLOC_DEBUG) -/* Use a small buffer size in debug build, mainly to facilitate testing. */ -# define PROF_DUMP_BUFSIZE 16 -#else -# define PROF_DUMP_BUFSIZE 65536 -#endif - -/* Size of size class related tables */ -#ifdef JEMALLOC_PROF -# define PROF_SC_NSIZES SC_NSIZES -#else -/* Minimize memory bloat for non-prof builds. */ -# define PROF_SC_NSIZES 1 -#endif - -/* Size of stack-allocated buffer used by prof_printf(). */ -#define PROF_PRINTF_BUFSIZE 128 - -/* - * Number of mutexes shared among all gctx's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NCTX_LOCKS 1024 - -/* - * Number of mutexes shared among all tdata's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NTDATA_LOCKS 256 - -/* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF -# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1) -#else -# define PROF_DUMP_FILENAME_LEN 1 -#endif - -/* Default number of recent allocations to record. */ -#define PROF_RECENT_ALLOC_MAX_DEFAULT 0 - -/* Thread name storage size limit. */ -#define PROF_THREAD_NAME_MAX_LEN 16 - -/* - * Minimum required alignment for sampled allocations. Over-aligning sampled - * allocations allows us to quickly identify them on the dalloc path without - * resorting to metadata lookup. - */ -#define PROF_SAMPLE_ALIGNMENT PAGE -#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK - -/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ -#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U)) - -#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */ diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h index 3ee8a6b3..0e7b029e 100644 --- a/include/jemalloc/internal/sz.h +++ b/include/jemalloc/internal/sz.h @@ -3,6 +3,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/pages.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/util.h" diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h new file mode 100644 index 00000000..e9ea5ac1 --- /dev/null +++ b/include/jemalloc/internal/tcache.h @@ -0,0 +1,199 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_H +#define JEMALLOC_INTERNAL_TCACHE_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/cache_bin.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/thread_event_registry.h" +#include "jemalloc/internal/ticker.h" + +/* Forward decls; only used as pointer types below. */ +typedef struct arena_s arena_t; +typedef struct base_s base_t; + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +typedef struct tcache_slow_s tcache_slow_t; +typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; + +/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */ +#define TCACHE_ZERO_INITIALIZER \ + { 0 } +#define TCACHE_SLOW_ZERO_INITIALIZER \ + { \ + { 0 } \ + } + +/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ +#define TCACHE_ENABLED_ZERO_INITIALIZER false + +/* Used for explicit tcache only. Means flushed but not destroyed. */ +/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ +#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1) + +#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD +#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT) +#define TCACHE_NBINS_MAX \ + (SC_NBINS \ + + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \ + + 1) +#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */ +#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */ +#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1) +#define TCACHE_GC_LARGE_NBINS_MAX 1 + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + +/* + * The tcache state is split into the slow and hot path data. Each has a + * pointer to the other, and the data always comes in pairs. The layout of each + * of them varies in practice; tcache_slow lives in the TSD for the automatic + * tcache, and as part of a dynamic allocation for manual allocations. Keeping + * a pointer to tcache_slow lets us treat these cases uniformly, rather than + * splitting up the tcache [de]allocation code into those paths called with the + * TSD tcache and those called with a manual tcache. + */ + +struct tcache_slow_s { + /* + * The descriptor lets the arena find our cache bins without seeing the + * tcache definition. This enables arenas to aggregate stats across + * tcaches without having a tcache dependency. + */ + cache_bin_array_descriptor_t cache_bin_array_descriptor; + + /* The arena this tcache is associated with. */ + arena_t *arena; + /* The number of bins activated in the tcache. */ + unsigned tcache_nbins; + /* Last time GC has been performed. */ + nstime_t last_gc_time; + /* Next bin to GC. */ + szind_t next_gc_bin; + szind_t next_gc_bin_small; + szind_t next_gc_bin_large; + /* For small bins, help determine how many items to fill at a time. */ + cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS]; + /* For small bins, whether has been refilled since last GC. */ + bool bin_refilled[SC_NBINS]; + /* + * For small bins, the number of items we can pretend to flush before + * actually flushing. + */ + uint8_t bin_flush_delay_items[SC_NBINS]; + /* + * The start of the allocation containing the dynamic allocation for + * either the cache bins alone, or the cache bin memory as well as this + * tcache_slow_t and its associated tcache_t. + */ + void *dyn_alloc; + + /* The associated bins. */ + tcache_t *tcache; +}; + +struct tcache_s { + tcache_slow_t *tcache_slow; + cache_bin_t bins[TCACHE_NBINS_MAX]; +}; + +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + +extern bool opt_tcache; +extern size_t opt_tcache_max; +extern ssize_t opt_lg_tcache_nslots_mul; +extern unsigned opt_tcache_nslots_small_min; +extern unsigned opt_tcache_nslots_small_max; +extern unsigned opt_tcache_nslots_large; +extern ssize_t opt_lg_tcache_shift; +extern size_t opt_tcache_gc_incr_bytes; +extern size_t opt_tcache_gc_delay_bytes; +extern unsigned opt_lg_tcache_flush_small_div; +extern unsigned opt_lg_tcache_flush_large_div; + +/* + * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more + * large-object bins. This is only used during threads initialization and + * changing it will not reflect on initialized threads as expected. Thus, + * it should not be changed on the fly. To change the number of tcache bins + * in use, refer to tcache_nbins of each tcache. + */ +extern unsigned global_do_not_change_tcache_nbins; + +/* + * Maximum cached size class. Same as above, this is only used during threads + * initialization and should not be changed. To change the maximum cached size + * class, refer to tcache_max of each tcache. + */ +extern size_t global_do_not_change_tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; + +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, bool *tcache_success); + +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, unsigned rem); +void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, bool is_small); +bool tcache_bin_info_default_init( + const char *bin_settings_segment_cur, size_t len_left); +bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len); +bool tcache_bin_ncached_max_read( + tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max); +void tcache_arena_reassociate( + tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); +tcache_t *tcache_create_explicit(tsd_t *tsd); +bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max); +void tcache_cleanup(tsd_t *tsd); +bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(tsdn_t *tsdn, base_t *base); +void tcache_arena_associate( + tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); +cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor( + tsdn_t *tsdn, arena_t *arena); +void tcache_prefork(tsdn_t *tsdn); +void tcache_postfork_parent(tsdn_t *tsdn); +void tcache_postfork_child(tsdn_t *tsdn); +void tcache_flush(tsd_t *tsd); +bool tsd_tcache_enabled_data_init(tsd_t *tsd); +void tcache_enabled_set(tsd_t *tsd, bool enabled); + +extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size, + size_t alignment); + +void tcache_assert_initialized(tcache_t *tcache); + +extern te_base_cb_t tcache_gc_te_handler; + +#endif /* JEMALLOC_INTERNAL_TCACHE_H */ diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h deleted file mode 100644 index 4dc0bae9..00000000 --- a/include/jemalloc/internal/tcache_externs.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H -#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/cache_bin.h" -#include "jemalloc/internal/sz.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/thread_event_registry.h" - -extern bool opt_tcache; -extern size_t opt_tcache_max; -extern ssize_t opt_lg_tcache_nslots_mul; -extern unsigned opt_tcache_nslots_small_min; -extern unsigned opt_tcache_nslots_small_max; -extern unsigned opt_tcache_nslots_large; -extern ssize_t opt_lg_tcache_shift; -extern size_t opt_tcache_gc_incr_bytes; -extern size_t opt_tcache_gc_delay_bytes; -extern unsigned opt_lg_tcache_flush_small_div; -extern unsigned opt_lg_tcache_flush_large_div; - -/* - * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more - * large-object bins. This is only used during threads initialization and - * changing it will not reflect on initialized threads as expected. Thus, - * it should not be changed on the fly. To change the number of tcache bins - * in use, refer to tcache_nbins of each tcache. - */ -extern unsigned global_do_not_change_tcache_nbins; - -/* - * Maximum cached size class. Same as above, this is only used during threads - * initialization and should not be changed. To change the maximum cached size - * class, refer to tcache_max of each tcache. - */ -extern size_t global_do_not_change_tcache_maxclass; - -/* - * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and - * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are - * completely disjoint from this data structure. tcaches starts off as a sparse - * array, so it has no physical memory footprint until individual pages are - * touched. This allows the entire array to be allocated the first time an - * explicit tcache is created without a disproportionate impact on memory usage. - */ -extern tcaches_t *tcaches; - -size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); -void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, bool *tcache_success); - -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, unsigned rem); -void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, bool is_small); -bool tcache_bin_info_default_init( - const char *bin_settings_segment_cur, size_t len_left); -bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len); -bool tcache_bin_ncached_max_read( - tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max); -void tcache_arena_reassociate( - tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); -tcache_t *tcache_create_explicit(tsd_t *tsd); -bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max); -void tcache_cleanup(tsd_t *tsd); -bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); -void tcaches_flush(tsd_t *tsd, unsigned ind); -void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(tsdn_t *tsdn, base_t *base); -void tcache_arena_associate( - tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); -cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor( - tsdn_t *tsdn, arena_t *arena); -void tcache_prefork(tsdn_t *tsdn); -void tcache_postfork_parent(tsdn_t *tsdn); -void tcache_postfork_child(tsdn_t *tsdn); -void tcache_flush(tsd_t *tsd); -bool tsd_tcache_enabled_data_init(tsd_t *tsd); -void tcache_enabled_set(tsd_t *tsd, bool enabled); - -extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size, - size_t alignment); - -void tcache_assert_initialized(tcache_t *tcache); - -extern te_base_cb_t tcache_gc_te_handler; - -#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */ diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index b3ce81bd..7abcbb40 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -2,15 +2,15 @@ #define JEMALLOC_INTERNAL_TCACHE_INLINES_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/bin.h" -#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/util.h" static inline bool diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h deleted file mode 100644 index 710286c9..00000000 --- a/include/jemalloc/internal/tcache_structs.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H -#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/cache_bin.h" -#include "jemalloc/internal/ql.h" -#include "jemalloc/internal/sc.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/ticker.h" - -/* - * The tcache state is split into the slow and hot path data. Each has a - * pointer to the other, and the data always comes in pairs. The layout of each - * of them varies in practice; tcache_slow lives in the TSD for the automatic - * tcache, and as part of a dynamic allocation for manual allocations. Keeping - * a pointer to tcache_slow lets us treat these cases uniformly, rather than - * splitting up the tcache [de]allocation code into those paths called with the - * TSD tcache and those called with a manual tcache. - */ - -struct tcache_slow_s { - /* - * The descriptor lets the arena find our cache bins without seeing the - * tcache definition. This enables arenas to aggregate stats across - * tcaches without having a tcache dependency. - */ - cache_bin_array_descriptor_t cache_bin_array_descriptor; - - /* The arena this tcache is associated with. */ - arena_t *arena; - /* The number of bins activated in the tcache. */ - unsigned tcache_nbins; - /* Last time GC has been performed. */ - nstime_t last_gc_time; - /* Next bin to GC. */ - szind_t next_gc_bin; - szind_t next_gc_bin_small; - szind_t next_gc_bin_large; - /* For small bins, help determine how many items to fill at a time. */ - cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS]; - /* For small bins, whether has been refilled since last GC. */ - bool bin_refilled[SC_NBINS]; - /* - * For small bins, the number of items we can pretend to flush before - * actually flushing. - */ - uint8_t bin_flush_delay_items[SC_NBINS]; - /* - * The start of the allocation containing the dynamic allocation for - * either the cache bins alone, or the cache bin memory as well as this - * tcache_slow_t and its associated tcache_t. - */ - void *dyn_alloc; - - /* The associated bins. */ - tcache_t *tcache; -}; - -struct tcache_s { - tcache_slow_t *tcache_slow; - cache_bin_t bins[TCACHE_NBINS_MAX]; -}; - -/* Linkage for list of available (previously used) explicit tcache IDs. */ -struct tcaches_s { - union { - tcache_t *tcache; - tcaches_t *next; - }; -}; - -#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */ diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h deleted file mode 100644 index 27d80d3c..00000000 --- a/include/jemalloc/internal/tcache_types.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H -#define JEMALLOC_INTERNAL_TCACHE_TYPES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/sc.h" - -typedef struct tcache_slow_s tcache_slow_t; -typedef struct tcache_s tcache_t; -typedef struct tcaches_s tcaches_t; - -/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */ -#define TCACHE_ZERO_INITIALIZER \ - { 0 } -#define TCACHE_SLOW_ZERO_INITIALIZER \ - { \ - { 0 } \ - } - -/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ -#define TCACHE_ENABLED_ZERO_INITIALIZER false - -/* Used for explicit tcache only. Means flushed but not destroyed. */ -/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ -#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1) - -#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD -#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT) -#define TCACHE_NBINS_MAX \ - (SC_NBINS \ - + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \ - + 1) -#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */ -#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */ -#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1) -#define TCACHE_GC_LARGE_NBINS_MAX 1 - -#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */ diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h index 35f3a211..dfe8a86e 100644 --- a/include/jemalloc/internal/test_hooks.h +++ b/include/jemalloc/internal/test_hooks.h @@ -1,8 +1,6 @@ #ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H #define JEMALLOC_INTERNAL_TEST_HOOKS_H -#include "jemalloc/internal/jemalloc_preamble.h" - extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void); extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void); diff --git a/include/jemalloc/internal/tsd_binshards.h b/include/jemalloc/internal/tsd_binshards.h new file mode 100644 index 00000000..a4afc496 --- /dev/null +++ b/include/jemalloc/internal/tsd_binshards.h @@ -0,0 +1,24 @@ +#ifndef JEMALLOC_INTERNAL_TSD_BINSHARDS_H +#define JEMALLOC_INTERNAL_TSD_BINSHARDS_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/sc.h" + +/* + * Per-thread cache of bin-shard assignments. This lives in its own header + * (separate from bin.h) so that tsd_internals.h can pull it in for X-macro + * expansion without dragging in mutex.h, which itself depends on TSD machinery + * and would form an include-order dependency cycle. + */ + +#define TSD_BINSHARDS_ZERO_INITIALIZER \ + { \ + { UINT8_MAX } \ + } + +typedef struct tsd_binshards_s tsd_binshards_t; +struct tsd_binshards_s { + uint8_t binshard[SC_NBINS]; +}; + +#endif /* JEMALLOC_INTERNAL_TSD_BINSHARDS_H */ diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h index 46b4930f..b8970ed0 100644 --- a/include/jemalloc/internal/tsd_internals.h +++ b/include/jemalloc/internal/tsd_internals.h @@ -4,16 +4,24 @@ #define JEMALLOC_INTERNAL_TSD_INTERNALS_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/arena_decay_constants.h" #include "jemalloc/internal/assert.h" -#include "jemalloc/internal/bin_types.h" +#include "jemalloc/internal/tsd_binshards.h" #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/peak.h" -#include "jemalloc/internal/prof_types.h" #include "jemalloc/internal/rtree_tsd.h" -#include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/thread_event_registry.h" + +/* + * Forward decls. tsd_internals.h cannot include arena.h / prof.h directly: + * those headers' STRUCTS-section includes trigger mutex.h -> tsd.h -> + * tsd_generic.h, which would re-enter this file before its body finishes. + * Each consumer here only uses these as pointer types. + */ +typedef struct arena_s arena_t; +typedef struct prof_tdata_s prof_tdata_t; + #include "jemalloc/internal/tsd_types.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/witness.h" diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index 1ba81aad..a48ca889 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -73,6 +73,7 @@ + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index c196ce59..e4bbe65a 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -94,6 +94,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index 62c36ea5..bc2685c0 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -73,6 +73,7 @@ + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index c196ce59..e4bbe65a 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -94,6 +94,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj index ed35784b..dffda081 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj @@ -73,6 +73,7 @@ + diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters index c196ce59..e4bbe65a 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters @@ -94,6 +94,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj index 7c84196d..c48f9a7b 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj @@ -73,6 +73,7 @@ + diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters index c196ce59..e4bbe65a 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters @@ -94,6 +94,9 @@ Source Files + + Source Files + Source Files diff --git a/scripts/gen_header_dep_graph.sh b/scripts/gen_header_dep_graph.sh new file mode 100755 index 00000000..0b99d5b0 --- /dev/null +++ b/scripts/gen_header_dep_graph.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Generate header dependency graph + tsort cycle check. +# +# Outputs (under build/): +# header_deps_baseline.txt - sorted, unique header->header (and .c->header) +# edges derived from #include "..." directives. +# header_tsort_order.txt - topological order produced by tsort. +# header_tsort_cycles.txt - stderr from tsort; non-empty if there's a cycle. +# per_tu_deps.txt - per-translation-unit transitive header lists +# (one TU per line; basenames only; sorted). +# +# All headers / sources are reduced to basenames so renames in the cleanup are +# easy to spot in the diff. + +set -euo pipefail + +repo_root="$(cd "$(dirname "$0")/.." && pwd)" +cd "$repo_root" +mkdir -p build + +edges_raw=build/header_edges_raw.txt +edges_baseline=build/header_deps_baseline.txt +tsort_order=build/header_tsort_order.txt +tsort_cycles=build/header_tsort_cycles.txt +per_tu=build/per_tu_deps.txt + +: > "$edges_raw" + +extract_edges() { + local src="$1" + local bn + bn=$(basename "$src") + # Match `#include "..."` (skip `<...>` system includes). Strip path, keep + # only the basename of the included file, so the graph collapses + # `../jemalloc.h` and similar relative paths into a single node. + awk ' + /^[[:space:]]*#[[:space:]]*include[[:space:]]+"/ { + match($0, /"[^"]+"/) + inc = substr($0, RSTART + 1, RLENGTH - 2) + n = split(inc, parts, "/") + print parts[n] + } + ' "$src" | while read -r dep; do + printf '%s %s\n' "$bn" "$dep" + done +} + +# Headers (include/jemalloc/internal/) — these are the nodes we care about for +# cycle detection. +for f in include/jemalloc/internal/*.h include/jemalloc/*.h; do + extract_edges "$f" >> "$edges_raw" +done + +# Translation units (src/*.c) — included so that .c files appear as graph +# sources in the baseline. They can't introduce cycles (nothing #includes a +# .c), but the edges are useful when diffing later. +for f in src/*.c; do + extract_edges "$f" >> "$edges_raw" +done + +sort -u "$edges_raw" > "$edges_baseline" + +# tsort consumes "A B" pairs and reports cycles on stderr. +: > "$tsort_cycles" +if ! tsort "$edges_baseline" > "$tsort_order" 2> "$tsort_cycles"; then + echo "tsort exited non-zero; see $tsort_cycles" >&2 +fi + +# Per-translation-unit transitive header lists, harvested from the .d files +# the build already produced (CC_MM=1 in the Makefile). +: > "$per_tu" +for d in src/*.d; do + [ -f "$d" ] || continue + tu=$(basename "${d%.d}.c") + # Strip the make rule prefix ("foo.o: foo.c \"), drop line continuations, + # collapse to basenames, sort+uniq. + deps=$( + tr '\n' ' ' < "$d" \ + | sed -E 's/\\//g' \ + | tr ' ' '\n' \ + | grep -E '\.h$' \ + | awk -F/ '{print $NF}' \ + | sort -u \ + | tr '\n' ' ' \ + | sed -E 's/[[:space:]]+$//' + ) + printf '%s: %s\n' "$tu" "$deps" >> "$per_tu" +done +sort -o "$per_tu" "$per_tu" + +edge_count=$(wc -l < "$edges_baseline" | tr -d ' ') +cycle_bytes=$(wc -c < "$tsort_cycles" | tr -d ' ') +tu_count=$(wc -l < "$per_tu" | tr -d ' ') +echo "edges: $edge_count" +echo "translation units: $tu_count" +echo "tsort cycle output bytes: $cycle_bytes" +if [ "$cycle_bytes" -gt 0 ]; then + echo "---- tsort cycle report ----" + cat "$tsort_cycles" +fi diff --git a/src/arena.c b/src/arena.c index d8bd7ae7..c4a31f63 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,16 +1,28 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/decay.h" #include "jemalloc/internal/ehooks.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" -#include "jemalloc/internal/san.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/san.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" #include "jemalloc/internal/util.h" +#include "jemalloc/internal/witness.h" JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS @@ -36,6 +48,14 @@ static pa_central_t arena_pa_central_global; div_info_t arena_binind_div_info[SC_NBINS]; +JET_EXTERN void +bin_dalloc_locked_begin( + bin_dalloc_locked_info_t *info, szind_t binind) { + info->div_info = arena_binind_div_info[binind]; + info->nregs = bin_infos[binind].nregs; + info->ndalloc = 0; +} + size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; @@ -694,11 +714,13 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) { assert(isalloc(tsdn, ptr) == usize); } -static size_t +size_t arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) { cassert(config_prof); + assert(opt_prof); assert(ptr != NULL); - size_t usize = isalloc(tsdn, ptr); + size_t usize = edata_usize_get(edata); + assert(isalloc(tsdn, ptr) == usize); size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT); assert(bumped_usize <= SC_LARGE_MINCLASS && PAGE_CEILING(bumped_usize) == bumped_usize); @@ -710,17 +732,6 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) { assert(isalloc(tsdn, ptr) == bumped_usize); - return bumped_usize; -} - -static void -arena_dalloc_promoted_impl( - tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path, edata_t *edata) { - cassert(config_prof); - assert(opt_prof); - - size_t usize = edata_usize_get(edata); - size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr); if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) { /* * Currently, we only do redzoning for small sampled @@ -728,21 +739,8 @@ arena_dalloc_promoted_impl( */ safety_check_verify_redzone(ptr, usize, bumped_usize); } - szind_t bumped_ind = sz_size2index(bumped_usize); - if (bumped_usize >= SC_LARGE_MINCLASS && tcache != NULL - && tcache_can_cache_large(tcache, bumped_ind)) { - tcache_dalloc_large( - tsdn_tsd(tsdn), tcache, ptr, bumped_ind, slow_path); - } else { - large_dalloc(tsdn, edata); - } -} -void -arena_dalloc_promoted( - tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) { - edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - arena_dalloc_promoted_impl(tsdn, ptr, tcache, slow_path, edata); + return bumped_usize; } void @@ -784,8 +782,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) { prof_free(tsd, ptr, usize, &alloc_ctx); } if (config_prof && opt_prof && alloc_ctx.szind < SC_NBINS) { - arena_dalloc_promoted_impl(tsd_tsdn(tsd), ptr, - /* tcache */ NULL, /* slow_path */ true, edata); + arena_prof_demote(tsd_tsdn(tsd), edata, ptr); + large_dalloc(tsd_tsdn(tsd), edata); } else { large_dalloc(tsd_tsdn(tsd), edata); } @@ -1154,33 +1152,6 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, } } -void * -arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, - bool zero, bool slab, tcache_t *tcache) { - if (slab) { - assert(sz_can_use_slab(usize)); - /* Small; alignment doesn't require special slab placement. */ - - /* usize should be a result of sz_sa2u() */ - assert((usize & (alignment - 1)) == 0); - - /* - * Small usize can't come from an alignment larger than a page. - */ - assert(alignment <= PAGE); - - return arena_malloc(tsdn, arena, usize, sz_size2index(usize), - zero, slab, tcache, true); - } else { - if (likely(alignment <= CACHELINE)) { - return large_malloc(tsdn, arena, usize, zero); - } else { - return large_palloc( - tsdn, arena, usize, alignment, zero); - } - } -} - static void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) { szind_t binind = edata_szind_get(edata); @@ -1607,64 +1578,6 @@ done: return ret; } -static void * -arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero, bool slab, tcache_t *tcache) { - if (alignment == 0) { - return arena_malloc(tsdn, arena, usize, sz_size2index(usize), - zero, slab, tcache, true); - } - usize = sz_sa2u(usize, alignment); - if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { - return NULL; - } - return ipalloct_explicit_slab( - tsdn, usize, alignment, zero, slab, tcache, arena); -} - -void * -arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache) { - size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment); - if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) { - return NULL; - } - - if (likely(slab)) { - assert(sz_can_use_slab(usize)); - /* Try to avoid moving the allocation. */ - UNUSED size_t newsize; - if (!arena_ralloc_no_move( - tsdn, ptr, oldsize, usize, 0, zero, &newsize)) { - return ptr; - } - } - - if (oldsize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS) { - return large_ralloc(tsdn, arena, ptr, usize, alignment, zero, - tcache); - } - - /* - * size and oldsize are different enough that we need to move the - * object. In that case, fall back to allocating new space and copying. - */ - void *ret = arena_ralloc_move_helper( - tsdn, arena, usize, alignment, zero, slab, tcache); - if (ret == NULL) { - return NULL; - } - - /* - * Junk/zero-filling were already done by - * ipalloc()/arena_malloc(). - */ - size_t copysize = (usize < oldsize) ? usize : oldsize; - memcpy(ret, ptr, copysize); - isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); - return ret; -} - ehooks_t * arena_get_ehooks(const arena_t *arena) { return base_ehooks_get(arena->base); diff --git a/src/arenas_management.c b/src/arenas_management.c index 261557b6..394303a3 100644 --- a/src/arenas_management.c +++ b/src/arenas_management.c @@ -1,11 +1,17 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/arena_inlines.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache.h" JEMALLOC_ALIGNED(CACHELINE) atomic_p_t arenas[MALLOCX_ARENA_LIMIT]; diff --git a/src/background_thread.c b/src/background_thread.c index 4901856a..dcda912f 100644 --- a/src/background_thread.c +++ b/src/background_thread.c @@ -1,7 +1,20 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/witness.h" JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS diff --git a/src/base.c b/src/base.c index 76227a5e..0ac658b7 100644 --- a/src/base.c +++ b/src/base.c @@ -1,7 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/ehooks.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/sz.h" diff --git a/src/bin.c b/src/bin.c index 30a78aba..694579b9 100644 --- a/src/bin.c +++ b/src/bin.c @@ -1,6 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/bin.h" #include "jemalloc/internal/sc.h" diff --git a/src/bin_info.c b/src/bin_info.c index e10042fd..0b8e551a 100644 --- a/src/bin_info.c +++ b/src/bin_info.c @@ -1,6 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/assert.h" #include "jemalloc/internal/bin_info.h" bin_info_t bin_infos[SC_NBINS]; diff --git a/src/bitmap.c b/src/bitmap.c index 8ac81a67..c399a05c 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/bitmap.h" /******************************************************************************/ diff --git a/src/buf_writer.c b/src/buf_writer.c index 3c298502..48b94100 100644 --- a/src/buf_writer.c +++ b/src/buf_writer.c @@ -1,7 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/buf_writer.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" static void * diff --git a/src/cache_bin.c b/src/cache_bin.c index ec677948..170e21b0 100644 --- a/src/cache_bin.c +++ b/src/cache_bin.c @@ -1,6 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/base.h" #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/cache_bin.h" #include "jemalloc/internal/safety_check.h" diff --git a/src/ckh.c b/src/ckh.c index 80688162..1bfcdf2f 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -38,10 +38,11 @@ #include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/util.h" diff --git a/src/conf.c b/src/conf.c index ecef73f5..14a9a048 100644 --- a/src/conf.c +++ b/src/conf.c @@ -1,8 +1,11 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/conf.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/fxp.h" @@ -10,13 +13,13 @@ #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/util.h" -#include "jemalloc/internal/conf.h" - /* Whether encountered any invalid config options. */ bool had_conf_error; diff --git a/src/counter.c b/src/counter.c index 8257a062..243c41ba 100644 --- a/src/counter.c +++ b/src/counter.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/counter.h" +#include "jemalloc/internal/witness.h" bool counter_accum_init(counter_accum_t *counter, uint64_t interval) { diff --git a/src/ctl.c b/src/ctl.c index e048135a..3d628429 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1,22 +1,32 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/inspect.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" #include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_log.h" #include "jemalloc/internal/prof_recent.h" #include "jemalloc/internal/prof_stats.h" #include "jemalloc/internal/prof_sys.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/util.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ diff --git a/src/decay.c b/src/decay.c index 7bbce2a6..1ed23bcb 100644 --- a/src/decay.c +++ b/src/decay.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/decay.h" diff --git a/src/ecache.c b/src/ecache.c index 20fcee9e..e6620a8a 100644 --- a/src/ecache.c +++ b/src/ecache.c @@ -1,7 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/san.h" +#include "jemalloc/internal/ecache.h" +#include "jemalloc/internal/eset.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/witness.h" bool ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind, diff --git a/src/edata.c b/src/edata.c index d71d1679..575e4c86 100644 --- a/src/edata.c +++ b/src/edata.c @@ -1,5 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/edata.h" ph_gen(, edata_avail, edata_t, avail_link, edata_esnead_comp) ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp) diff --git a/src/edata_cache.c b/src/edata_cache.c index 3ac8273a..68a399da 100644 --- a/src/edata_cache.c +++ b/src/edata_cache.c @@ -1,5 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/witness.h" bool edata_cache_init(edata_cache_t *edata_cache, base_t *base) { diff --git a/src/ehooks.c b/src/ehooks.c index d7abb960..bd9a8ac6 100644 --- a/src/ehooks.c +++ b/src/ehooks.c @@ -1,8 +1,14 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/edata.h" #include "jemalloc/internal/ehooks.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind) { diff --git a/src/emap.c b/src/emap.c index c9a371d2..c6936f8a 100644 --- a/src/emap.c +++ b/src/emap.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" enum emap_lock_result_e { emap_lock_result_success, diff --git a/src/eset.c b/src/eset.c index bdce1834..4d1f8f04 100644 --- a/src/eset.c +++ b/src/eset.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/eset.h" diff --git a/src/exp_grow.c b/src/exp_grow.c index 955823a1..17699561 100644 --- a/src/exp_grow.c +++ b/src/exp_grow.c @@ -1,5 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/exp_grow.h" void exp_grow_init(exp_grow_t *exp_grow) { diff --git a/src/extent.c b/src/extent.c index cf935c18..2e2977f3 100644 --- a/src/extent.c +++ b/src/extent.c @@ -1,12 +1,22 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" +#include "jemalloc/internal/edata_cache.h" #include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" -#include "jemalloc/internal/ph.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/pac.h" +#include "jemalloc/internal/ph.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ diff --git a/src/extent_dss.c b/src/extent_dss.c index 8fac71a7..16dbe8a2 100644 --- a/src/extent_dss.c +++ b/src/extent_dss.c @@ -1,9 +1,14 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" #include "jemalloc/internal/spin.h" +#include "jemalloc/internal/tsd.h" /******************************************************************************/ /* Data. */ diff --git a/src/extent_mmap.c b/src/extent_mmap.c index d39bddc6..10574618 100644 --- a/src/extent_mmap.c +++ b/src/extent_mmap.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/pages.h" /******************************************************************************/ /* Data. */ diff --git a/src/fxp.c b/src/fxp.c index faeab207..ff3de54e 100644 --- a/src/fxp.c +++ b/src/fxp.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/fxp.h" +#include "jemalloc/internal/malloc_io.h" static bool fxp_isdigit(char c) { diff --git a/src/hpa.c b/src/hpa.c index d59b7fc7..a4b3750c 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -1,12 +1,11 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/fb.h" #include "jemalloc/internal/hpa.h" #include "jemalloc/internal/hpa_utils.h" - -#include "jemalloc/internal/fb.h" -#include "jemalloc/internal/witness.h" #include "jemalloc/internal/jemalloc_probe.h" +#include "jemalloc/internal/witness.h" static void hpa_dalloc_batch(tsdn_t *tsdn, hpa_shard_t *shard, edata_list_active_t *list, bool *deferred_work_generated); diff --git a/src/hpa_central.c b/src/hpa_central.c index b4f770c2..9d75f501 100644 --- a/src/hpa_central.c +++ b/src/hpa_central.c @@ -1,6 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/hpa.h" #include "jemalloc/internal/hpa_central.h" #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/witness.h" diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c index 2ec7029d..1c292732 100644 --- a/src/hpa_hooks.c +++ b/src/hpa_hooks.c @@ -1,8 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/hpa_hooks.h" #include "jemalloc/internal/jemalloc_probe.h" +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/pages.h" static void *hpa_hooks_map(size_t size); static void hpa_hooks_unmap(void *ptr, size_t size); diff --git a/src/hpa_utils.c b/src/hpa_utils.c index 59bb0d1f..02817d83 100644 --- a/src/hpa_utils.c +++ b/src/hpa_utils.c @@ -1,7 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/hpa_utils.h" +#include "jemalloc/internal/hpdata.h" void hpa_purge_batch(hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz) { diff --git a/src/hpdata.c b/src/hpdata.c index a538a422..1da4ffa0 100644 --- a/src/hpdata.c +++ b/src/hpdata.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/hpdata.h" diff --git a/src/inspect.c b/src/inspect.c index 1c0de129..587d9236 100644 --- a/src/inspect.c +++ b/src/inspect.c @@ -1,5 +1,10 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" +#include "jemalloc/internal/bin_info.h" +#include "jemalloc/internal/edata.h" +#include "jemalloc/internal/emap.h" #include "jemalloc/internal/inspect.h" void diff --git a/src/jemalloc.c b/src/jemalloc.c index 6544657d..12fc5f6e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1,32 +1,41 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/buf_writer.h" +#include "jemalloc/internal/conf.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/fxp.h" -#include "jemalloc/internal/san.h" #include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/log.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/spin.h" #include "jemalloc/internal/sz.h" -#include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" #include "jemalloc/internal/thread_event.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/util.h" - -#include "jemalloc/internal/conf.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ @@ -139,6 +148,24 @@ const char *const zero_realloc_mode_names[] = { "abort", }; +/* + * Check whether the next allocation would trip the profiling sampler without + * advancing the event counter (the counter only advances at the end of the + * alloc/dalloc call). Lets the allocation path pre-compute the prof context + * before committing. Lives here -- not in prof_inlines.h -- because jemalloc.c + * is the only production caller. + */ +JEMALLOC_ALWAYS_INLINE bool +prof_sample_lookahead(tsd_t *tsd, size_t usize) { + if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { + return false; + } + /* The subtraction is intentionally susceptible to underflow. */ + uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize + - tsd_thread_allocated_last_event_get(tsd); + return accumbytes >= tsd_prof_sample_event_wait_get(tsd); +} + /* * These are the documented values for junk fill debugging facilities -- see the * man page. @@ -600,7 +627,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { /* If profiling is on, get our profiling context. */ if (config_prof && opt_prof) { bool prof_active = prof_active_get_unlocked(); - bool sample_event = te_prof_sample_event_lookahead(tsd, usize); + bool sample_event = prof_sample_lookahead(tsd, usize); prof_tctx_t *tctx = prof_alloc_prep( tsd, prof_active, sample_event); @@ -1402,7 +1429,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, prof_info_t old_prof_info; prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info); bool prof_active = prof_active_get_unlocked(); - bool sample_event = te_prof_sample_event_lookahead(tsd, usize); + bool sample_event = prof_sample_lookahead(tsd, usize); prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event); void *p; if (unlikely(tctx != PROF_TCTX_SENTINEL)) { @@ -1640,7 +1667,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, usize_max = SC_LARGE_MAXCLASS; } bool prof_active = prof_active_get_unlocked(); - bool sample_event = te_prof_sample_event_lookahead(tsd, usize_max); + bool sample_event = prof_sample_lookahead(tsd, usize_max); prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event); size_t usize; @@ -1675,7 +1702,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, prof_info_get_and_reset_recent( tsd, ptr, &new_alloc_ctx, &prof_info); assert(usize <= usize_max); - sample_event = te_prof_sample_event_lookahead(tsd, usize); + sample_event = prof_sample_lookahead(tsd, usize); prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr, old_usize, &prof_info, sample_event); } diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp index ac109bb2..193b536b 100644 --- a/src/jemalloc_cpp.cpp +++ b/src/jemalloc_cpp.cpp @@ -7,7 +7,17 @@ extern "C" { #endif #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" #ifdef __cplusplus } diff --git a/src/jemalloc_fork.c b/src/jemalloc_fork.c index 9bab77e8..6d23c47e 100644 --- a/src/jemalloc_fork.c +++ b/src/jemalloc_fork.c @@ -1,10 +1,17 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/jemalloc_fork.h" #include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/arena_inlines.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tsd.h" /******************************************************************************/ /* diff --git a/src/jemalloc_init.c b/src/jemalloc_init.c index 2500a385..88ae41c3 100644 --- a/src/jemalloc_init.c +++ b/src/jemalloc_init.c @@ -1,7 +1,17 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" +#include "jemalloc/internal/witness.h" #include "jemalloc/internal/conf.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/emap.h" diff --git a/src/large.c b/src/large.c index 610c9b6c..f5894cfa 100644 --- a/src/large.c +++ b/src/large.c @@ -1,10 +1,16 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_recent.h" #include "jemalloc/internal/util.h" diff --git a/src/log.c b/src/log.c index 9b1c6261..63054d83 100644 --- a/src/log.c +++ b/src/log.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/log.h" diff --git a/src/malloc_dispatch.c b/src/malloc_dispatch.c new file mode 100644 index 00000000..34c31ad8 --- /dev/null +++ b/src/malloc_dispatch.c @@ -0,0 +1,117 @@ +#include "jemalloc/internal/jemalloc_preamble.h" + +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/assert.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/large.h" +#include "jemalloc/internal/malloc_dispatch_externs.h" +#include "jemalloc/internal/malloc_dispatch_inlines.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" + +/******************************************************************************/ + +void +malloc_dispatch_dalloc_promoted( + tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) { + cassert(config_prof); + assert(opt_prof); + + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); + size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr); + szind_t bumped_ind = sz_size2index(bumped_usize); + if (bumped_usize >= SC_LARGE_MINCLASS && tcache != NULL + && tcache_can_cache_large(tcache, bumped_ind)) { + tcache_dalloc_large( + tsdn_tsd(tsdn), tcache, ptr, bumped_ind, slow_path); + } else { + large_dalloc(tsdn, edata); + } +} + +void * +malloc_dispatch_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, bool slab, tcache_t *tcache) { + if (slab) { + assert(sz_can_use_slab(usize)); + /* Small; alignment doesn't require special slab placement. */ + + /* usize should be a result of sz_sa2u() */ + assert((usize & (alignment - 1)) == 0); + + /* + * Small usize can't come from an alignment larger than a page. + */ + assert(alignment <= PAGE); + + return malloc_dispatch_malloc(tsdn, arena, usize, + sz_size2index(usize), zero, slab, tcache, true); + } else { + if (likely(alignment <= CACHELINE)) { + return large_malloc(tsdn, arena, usize, zero); + } else { + return large_palloc( + tsdn, arena, usize, alignment, zero); + } + } +} + +static void * +malloc_dispatch_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, bool slab, tcache_t *tcache) { + if (alignment == 0) { + return malloc_dispatch_malloc(tsdn, arena, usize, + sz_size2index(usize), zero, slab, tcache, true); + } + usize = sz_sa2u(usize, alignment); + if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { + return NULL; + } + return ipalloct_explicit_slab( + tsdn, usize, alignment, zero, slab, tcache, arena); +} + +void * +malloc_dispatch_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache) { + size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment); + if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) { + return NULL; + } + + if (likely(slab)) { + assert(sz_can_use_slab(usize)); + /* Try to avoid moving the allocation. */ + UNUSED size_t newsize; + if (!arena_ralloc_no_move( + tsdn, ptr, oldsize, usize, 0, zero, &newsize)) { + return ptr; + } + } + + if (oldsize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS) { + return large_ralloc(tsdn, arena, ptr, usize, alignment, zero, + tcache); + } + + /* + * size and oldsize are different enough that we need to move the + * object. In that case, fall back to allocating new space and copying. + */ + void *ret = malloc_dispatch_ralloc_move_helper( + tsdn, arena, usize, alignment, zero, slab, tcache); + if (ret == NULL) { + return NULL; + } + + /* + * Junk/zero-filling were already done by ipalloc() / dispatch alloc. + */ + size_t copysize = (usize < oldsize) ? usize : oldsize; + memcpy(ret, ptr, copysize); + isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); + return ret; +} diff --git a/src/malloc_io.c b/src/malloc_io.c index e76a6b73..2b8a6564 100644 --- a/src/malloc_io.c +++ b/src/malloc_io.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/util.h" diff --git a/src/mutex.c b/src/mutex.c index aa2ab665..04a14a24 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/spin.h" #if defined(_WIN32) && !defined(_CRT_SPINCOUNT) diff --git a/src/nstime.c b/src/nstime.c index 0dfbeda1..5517877d 100644 --- a/src/nstime.c +++ b/src/nstime.c @@ -1,9 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - -#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/nstime.h" #define BILLION UINT64_C(1000000000) #define MILLION UINT64_C(1000000) diff --git a/src/pa.c b/src/pa.c index f14fda81..2a560ed9 100644 --- a/src/pa.c +++ b/src/pa.c @@ -1,8 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/san.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/hpa.h" +#include "jemalloc/internal/pa.h" +#include "jemalloc/internal/san.h" static void pa_nactive_add(pa_shard_t *shard, size_t add_pages) { diff --git a/src/pa_extra.c b/src/pa_extra.c index 17b4449a..24ff2e6d 100644 --- a/src/pa_extra.c +++ b/src/pa_extra.c @@ -1,5 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/pa.h" /* * This file is logically part of the PA module. While pa.c contains the core diff --git a/src/pac.c b/src/pac.c index aab2bb1e..caf5f9d5 100644 --- a/src/pac.c +++ b/src/pac.c @@ -1,8 +1,11 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/extent.h" #include "jemalloc/internal/pac.h" #include "jemalloc/internal/san.h" +#include "jemalloc/internal/witness.h" static inline void pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay, diff --git a/src/pages.c b/src/pages.c index 4bca965a..4f316a9d 100644 --- a/src/pages.c +++ b/src/pages.c @@ -1,11 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/pages.h" - -#include "jemalloc/internal/jemalloc_internal_includes.h" - #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/sc.h" #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT # include diff --git a/src/peak_event.c b/src/peak_event.c index 39f90b70..f42cfc47 100644 --- a/src/peak_event.c +++ b/src/peak_event.c @@ -1,10 +1,10 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - -#include "jemalloc/internal/peak_event.h" #include "jemalloc/internal/peak.h" +#include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" +#include "jemalloc/internal/tsd.h" /* Update the peak with current tsd state. */ void diff --git a/src/prof.c b/src/prof.c index a833fed5..eff0fc76 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1,16 +1,21 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" -#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/counter.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_hook.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_log.h" #include "jemalloc/internal/prof_recent.h" #include "jemalloc/internal/prof_stats.h" #include "jemalloc/internal/prof_sys.h" -#include "jemalloc/internal/prof_hook.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" diff --git a/src/prof_data.c b/src/prof_data.c index 7aa047ac..d11f7907 100644 --- a/src/prof_data.c +++ b/src/prof_data.c @@ -1,11 +1,19 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_inlines.h" +#include "jemalloc/internal/prof_sys.h" +#include "jemalloc/internal/witness.h" /* * This file defines and manages the core profiling data structures. diff --git a/src/prof_log.c b/src/prof_log.c index 74f1372f..a1f8dfa0 100644 --- a/src/prof_log.c +++ b/src/prof_log.c @@ -1,8 +1,13 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/buf_writer.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/emitter.h" #include "jemalloc/internal/hash.h" diff --git a/src/prof_recent.c b/src/prof_recent.c index f7108bee..23146ec4 100644 --- a/src/prof_recent.c +++ b/src/prof_recent.c @@ -1,10 +1,15 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/buf_writer.h" #include "jemalloc/internal/emitter.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_recent.h" ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT; diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c index 8ebcab8e..ef5e8062 100644 --- a/src/prof_stack_range.c +++ b/src/prof_stack_range.c @@ -1,5 +1,4 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/prof_sys.h" diff --git a/src/prof_stats.c b/src/prof_stats.c index db248be7..81d5e6f6 100644 --- a/src/prof_stats.c +++ b/src/prof_stats.c @@ -1,7 +1,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_stats.h" +#include "jemalloc/internal/tsd.h" bool opt_prof_stats = false; malloc_mutex_t prof_stats_mtx; diff --git a/src/prof_sys.c b/src/prof_sys.c index be50c0be..7067b152 100644 --- a/src/prof_sys.c +++ b/src/prof_sys.c @@ -1,10 +1,16 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/buf_writer.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/prof_data.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_sys.h" #ifdef JEMALLOC_PROF_LIBUNWIND diff --git a/src/psset.c b/src/psset.c index 4c6ab255..b9b739ad 100644 --- a/src/psset.c +++ b/src/psset.c @@ -1,9 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - -#include "jemalloc/internal/psset.h" #include "jemalloc/internal/fb.h" +#include "jemalloc/internal/psset.h" +#include "jemalloc/internal/sz.h" void psset_init(psset_t *psset) { diff --git a/src/rtree.c b/src/rtree.c index ac27f829..a63f2b01 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" /* * Only the most significant bits of keys passed to rtree_{read,write}() are diff --git a/src/safety_check.c b/src/safety_check.c index d052718d..f11a263b 100644 --- a/src/safety_check.c +++ b/src/safety_check.c @@ -1,5 +1,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/safety_check.h" static safety_check_abort_hook_t safety_check_abort; diff --git a/src/san.c b/src/san.c index 5448c67f..99a3d783 100644 --- a/src/san.c +++ b/src/san.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ehooks.h" +#include "jemalloc/internal/emap.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/tsd.h" diff --git a/src/san_bump.c b/src/san_bump.c index 11031290..30b90b04 100644 --- a/src/san_bump.c +++ b/src/san_bump.c @@ -1,11 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/san_bump.h" +#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/ehooks.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/pac.h" #include "jemalloc/internal/san.h" -#include "jemalloc/internal/ehooks.h" -#include "jemalloc/internal/edata_cache.h" +#include "jemalloc/internal/san_bump.h" static bool san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac, ehooks_t *ehooks, size_t size); diff --git a/src/sec.c b/src/sec.c index 493e4629..879fc47e 100644 --- a/src/sec.c +++ b/src/sec.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" -#include "jemalloc/internal/sec.h" #include "jemalloc/internal/jemalloc_probe.h" +#include "jemalloc/internal/sec.h" +#include "jemalloc/internal/witness.h" static bool sec_bin_init(sec_bin_t *bin) { diff --git a/src/stats.c b/src/stats.c index 65583393..bf016d7c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1,13 +1,19 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/emitter.h" #include "jemalloc/internal/fxp.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/prof_stats.h" +#include "jemalloc/internal/tcache.h" static const char *const global_mutex_names[mutex_prof_num_global_mutexes] = { #define OP(mtx) #mtx, diff --git a/src/sz.c b/src/sz.c index da92f2b4..5cff6f8b 100644 --- a/src/sz.c +++ b/src/sz.c @@ -1,5 +1,5 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + #include "jemalloc/internal/sz.h" JEMALLOC_ALIGNED(CACHELINE) diff --git a/src/tcache.c b/src/tcache.c index 8c2f6f4c..012579bd 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -1,12 +1,25 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/background_thread_inlines.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/prof_inlines.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/tcache_inlines.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ diff --git a/src/thread_event.c b/src/thread_event.c index a8c5e2e1..2f2dd711 100644 --- a/src/thread_event.c +++ b/src/thread_event.c @@ -1,9 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" -#include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/tsd.h" static bool te_ctx_has_active_events(te_ctx_t *ctx) { diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c index b8307df0..a25050e8 100644 --- a/src/thread_event_registry.c +++ b/src/thread_event_registry.c @@ -1,12 +1,15 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/peak_event.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" -#include "jemalloc/internal/tcache_externs.h" -#include "jemalloc/internal/peak_event.h" -#include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/witness.h" static malloc_mutex_t uevents_mu; diff --git a/src/ticker.c b/src/ticker.c index 1fd6ac96..b3cac9d1 100644 --- a/src/ticker.c +++ b/src/ticker.c @@ -1,5 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/ticker.h" /* * To avoid using floating point math down core paths (still necessary because diff --git a/src/tsd.c b/src/tsd.c index 67200f50..814a4e70 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -1,10 +1,21 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arena_inlines.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" -#include "jemalloc/internal/san.h" +#include "jemalloc/internal/background_thread.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/san.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/thread_event.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/witness.h" /******************************************************************************/ /* Data. */ diff --git a/src/util.c b/src/util.c index 1bcf4fee..a23a5c8c 100644 --- a/src/util.c +++ b/src/util.c @@ -1,6 +1,6 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/util.h" /* Reads the next size pair in a multi-sized option. */ diff --git a/src/witness.c b/src/witness.c index 940b1eae..6f1a17e7 100644 --- a/src/witness.c +++ b/src/witness.c @@ -1,8 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/witness.h" void witness_init(witness_t *witness, const char *name, witness_rank_t rank, diff --git a/src/zone.c b/src/zone.c index 62d2eabb..62957f17 100644 --- a/src/zone.c +++ b/src/zone.c @@ -1,8 +1,12 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/assert.h" #include "jemalloc/internal/jemalloc_fork.h" +#include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #ifndef JEMALLOC_ZONE # error "This source file is for zones on Darwin (OS X)." diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 8b139db1..c263c32a 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -47,7 +47,18 @@ extern "C" { # define JEMALLOC_JET # define JEMALLOC_MANGLE # include "jemalloc/internal/jemalloc_preamble.h" -# include "jemalloc/internal/jemalloc_internal_includes.h" +# include "jemalloc/internal/arena.h" +# include "jemalloc/internal/jemalloc_internal_externs.h" +# include "jemalloc/internal/large.h" +# include "jemalloc/internal/tcache.h" +# include "jemalloc/internal/prof.h" +# include "jemalloc/internal/background_thread.h" +# include "jemalloc/internal/jemalloc_internal_inlines_a.h" +# include "jemalloc/internal/arena_inlines.h" +# include "jemalloc/internal/tcache_inlines.h" +# include "jemalloc/internal/jemalloc_internal_inlines_c.h" +# include "jemalloc/internal/prof_inlines.h" +# include "jemalloc/internal/background_thread_inlines.h" /******************************************************************************/ /* @@ -92,7 +103,18 @@ extern "C" { # define JEMALLOC_JET # include "jemalloc/internal/jemalloc_preamble.h" -# include "jemalloc/internal/jemalloc_internal_includes.h" +# include "jemalloc/internal/arena.h" +# include "jemalloc/internal/jemalloc_internal_externs.h" +# include "jemalloc/internal/large.h" +# include "jemalloc/internal/tcache.h" +# include "jemalloc/internal/prof.h" +# include "jemalloc/internal/background_thread.h" +# include "jemalloc/internal/jemalloc_internal_inlines_a.h" +# include "jemalloc/internal/arena_inlines.h" +# include "jemalloc/internal/tcache_inlines.h" +# include "jemalloc/internal/jemalloc_internal_inlines_c.h" +# include "jemalloc/internal/prof_inlines.h" +# include "jemalloc/internal/background_thread_inlines.h" # include "jemalloc/internal/public_unnamespace.h" # undef JEMALLOC_JET diff --git a/test/integration/extent.c b/test/integration/extent.c index c15bf761..36091ac6 100644 --- a/test/integration/extent.c +++ b/test/integration/extent.c @@ -2,7 +2,7 @@ #include "test/extent_hooks.h" -#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/arena.h" static void test_extent_body(unsigned arena_ind) { diff --git a/test/unit/bin.c b/test/unit/bin.c index 08dd4665..a7091544 100644 --- a/test/unit/bin.c +++ b/test/unit/bin.c @@ -1,5 +1,8 @@ #include "test/jemalloc_test.h" +extern void bin_dalloc_locked_begin( + bin_dalloc_locked_info_t *info, szind_t binind); + #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1) /* Create a page-aligned mock slab with all regions free. */ diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c index 54d8583d..423f2be3 100644 --- a/test/unit/san_bump.c +++ b/test/unit/san_bump.c @@ -1,7 +1,7 @@ #include "test/jemalloc_test.h" #include "test/arena_util.h" -#include "jemalloc/internal/arena_structs.h" +#include "jemalloc/internal/arena.h" #include "jemalloc/internal/san_bump.h" static extent_hooks_t *san_bump_default_hooks; diff --git a/test/unit/slab.c b/test/unit/slab.c index d98663e8..e7adafea 100644 --- a/test/unit/slab.c +++ b/test/unit/slab.c @@ -1,5 +1,8 @@ #include "test/jemalloc_test.h" +extern void bin_dalloc_locked_begin( + bin_dalloc_locked_info_t *info, szind_t binind); + #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1) TEST_BEGIN(test_bin_slab_regind) { diff --git a/test/unit/stats.c b/test/unit/stats.c index d2719db2..ee6cc9b2 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -1,6 +1,6 @@ #include "test/jemalloc_test.h" -#include "jemalloc/internal/arena_structs.h" +#include "jemalloc/internal/arena.h" #define STRINGIFY_HELPER(x) #x #define STRINGIFY(x) STRINGIFY_HELPER(x)