From ab4c178444f0a305c21cb306b9504f5f6c461f75 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 27 May 2026 18:07:16 -0400 Subject: [PATCH] Consolidate prof_* and tcache_* header splits Each of these components had a four-way split (_types, _structs, _externs, _inlines) that dates back to the old "include each section multiple times from a master file" pattern. With Step 2's edata <-> prof_types decoupling, merging _types + _structs + _externs into one header per component no longer risks recreating an include cycle. - prof.h replaces prof_types.h + prof_structs.h + prof_externs.h. - tcache.h replaces tcache_types.h + tcache_structs.h + tcache_externs.h. prof_inlines.h and tcache_inlines.h are kept separate: prof_inlines.h sits at the bottom of the dependency layering, and tcache_inlines.h's include of arena_externs.h is the asymmetric cycle-breaker that keeps the arena<->tcache symbol cycle from becoming an include cycle. Two surprises required adjustments beyond a straight concatenation: 1. te_prof_sample_event_lookahead was a JEMALLOC_ALWAYS_INLINE function defined in prof_externs.h, but its body calls tsd_thread_allocated_* accessors that only exist after tsd inlines are loaded. The original layering hid this because prof_externs.h was only included near the bottom of jemalloc_internal_includes.h. After consolidation, tsd_internals.h's includes pull prof.h in earlier, exposing the ordering dependency. Moved the inline to prof_inlines.h (where inline definitions belong anyway) and left only the related extern in prof.h. 2. base.h was included from prof_externs.h and tcache_externs.h purely for base_t * pointer arguments on a couple of declarations. Carrying that include into the merged prof.h / tcache.h would pull ehooks.h (-> tsd.h) into tsd_internals.h before tsd_internals.h finishes declaring its tsd accessors. Replaced with a forward declaration of base_t in each merged file. Similarly, tsd_internals.h's prior #include of prof_types.h becomes a forward decl of prof_tdata_t (the only prof symbol it references, and only as a pointer), and large.h needs a forward decl of prof_info_t because large.h is loaded before prof.h in the new master ordering. No inline / static qualifiers are dropped; only the one inline moves files. #ifdef blocks (JEMALLOC_PROF, JEMALLOC_PROF_LIBGCC, JEMALLOC_PROF_GCC, JEMALLOC_DEBUG) are kept intact. --- include/jemalloc/internal/arena_inlines_b.h | 3 +- .../internal/jemalloc_internal_includes.h | 8 +- .../internal/jemalloc_internal_inlines_a.h | 2 +- include/jemalloc/internal/large.h | 3 + include/jemalloc/internal/prof.h | 427 ++++++++++++++++++ include/jemalloc/internal/prof_externs.h | 132 ------ include/jemalloc/internal/prof_inlines.h | 29 +- include/jemalloc/internal/prof_structs.h | 221 --------- include/jemalloc/internal/prof_types.h | 94 ---- include/jemalloc/internal/tcache.h | 198 ++++++++ include/jemalloc/internal/tcache_externs.h | 91 ---- include/jemalloc/internal/tcache_inlines.h | 2 +- include/jemalloc/internal/tcache_structs.h | 72 --- include/jemalloc/internal/tcache_types.h | 37 -- include/jemalloc/internal/tsd_internals.h | 7 +- src/thread_event_registry.c | 4 +- 16 files changed, 666 insertions(+), 664 deletions(-) create mode 100644 include/jemalloc/internal/prof.h delete mode 100644 include/jemalloc/internal/prof_externs.h delete mode 100644 include/jemalloc/internal/prof_structs.h delete mode 100644 include/jemalloc/internal/prof_types.h create mode 100644 include/jemalloc/internal/tcache.h delete mode 100644 include/jemalloc/internal/tcache_externs.h delete mode 100644 include/jemalloc/internal/tcache_structs.h delete mode 100644 include/jemalloc/internal/tcache_types.h diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 5f0420c9..f790834c 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -10,8 +10,7 @@ #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/large.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/prof_structs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h index 86e2aea1..87ef4c82 100644 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -41,16 +41,12 @@ /******************************************************************************/ #include "jemalloc/internal/arena_types.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/prof_types.h" /******************************************************************************/ /* STRUCTS */ /******************************************************************************/ -#include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/arena_structs.h" -#include "jemalloc/internal/tcache_structs.h" /******************************************************************************/ /* EXTERNS */ @@ -59,9 +55,9 @@ #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/arena_externs.h" #include "jemalloc/internal/large.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/malloc_dispatch_externs.h" -#include "jemalloc/internal/prof_externs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/background_thread.h" /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h index 646ec5be..01771d7a 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -9,7 +9,7 @@ #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/sc.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/ticker.h" JEMALLOC_ALWAYS_INLINE malloc_cpuid_t diff --git a/include/jemalloc/internal/large.h b/include/jemalloc/internal/large.h index e28841c1..8e7bdae0 100644 --- a/include/jemalloc/internal/large.h +++ b/include/jemalloc/internal/large.h @@ -4,6 +4,9 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/edata.h" +/* Forward decl; only prof_info_t * is used as a pointer arg below. */ +typedef struct prof_info_s prof_info_t; + void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); void *large_palloc( tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero); diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h new file mode 100644 index 00000000..b0757886 --- /dev/null +++ b/include/jemalloc/internal/prof.h @@ -0,0 +1,427 @@ +#ifndef JEMALLOC_INTERNAL_PROF_H +#define JEMALLOC_INTERNAL_PROF_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/prof_hook.h" +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/thread_event_registry.h" + +/* Forward decl; only base_t * is used as a pointer arg below. */ +typedef struct base_s base_t; + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_info_s prof_info_t; +typedef struct prof_gctx_s prof_gctx_t; +typedef struct prof_tdata_s prof_tdata_t; +typedef struct prof_recent_s prof_recent_t; + +/* Option defaults. */ +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif +#define LG_PROF_SAMPLE_DEFAULT 19 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#ifdef JEMALLOC_PROF_GCC +# define PROF_BT_MAX_LIMIT 256 +#else +# define PROF_BT_MAX_LIMIT UINT_MAX +#endif +#define PROF_BT_MAX_DEFAULT 128 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#ifndef JEMALLOC_PROF +/* Minimize memory bloat for non-prof builds. */ +# define PROF_DUMP_BUFSIZE 1 +#elif defined(JEMALLOC_DEBUG) +/* Use a small buffer size in debug build, mainly to facilitate testing. */ +# define PROF_DUMP_BUFSIZE 16 +#else +# define PROF_DUMP_BUFSIZE 65536 +#endif + +/* Size of size class related tables */ +#ifdef JEMALLOC_PROF +# define PROF_SC_NSIZES SC_NSIZES +#else +/* Minimize memory bloat for non-prof builds. */ +# define PROF_SC_NSIZES 1 +#endif + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all gctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF +# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1) +#else +# define PROF_DUMP_FILENAME_LEN 1 +#endif + +/* Default number of recent allocations to record. */ +#define PROF_RECENT_ALLOC_MAX_DEFAULT 0 + +/* Thread name storage size limit. */ +#define PROF_THREAD_NAME_MAX_LEN 16 + +/* + * Minimum required alignment for sampled allocations. Over-aligning sampled + * allocations allows us to quickly identify them on the dalloc path without + * resorting to metadata lookup. + */ +#define PROF_SAMPLE_ALIGNMENT PAGE +#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK + +/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ +#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U)) + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + void **vec; + unsigned *len; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curobjs_shifted_unbiased; + uint64_t curbytes; + uint64_t curbytes_unbiased; + uint64_t accumobjs; + uint64_t accumobjs_shifted_unbiased; + uint64_t accumbytes; + uint64_t accumbytes_unbiased; +}; + +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; + +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; + + /* + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. + */ + uint64_t thr_uid; + uint64_t thr_discrim; + + /* + * Reference count of how many times this tctx object is referenced in + * recent allocation / deallocation records, protected by tdata->lock. + */ + uint64_t recent_count; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; + + /* + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). + */ + uint64_t tctx_uid; + + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; + + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; + +struct prof_info_s { + /* Time when the allocation was made. */ + nstime_t alloc_time; + /* Points to the prof_tctx_t corresponding to the allocation. */ + prof_tctx_t *alloc_tctx; + /* Allocation request size. */ + size_t alloc_size; +}; + +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this gctx to be in a state of + * limbo due to one of: + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * gctx. + */ + unsigned nlimbo; + + /* + * Tree of profile counters, one for each thread that has allocated in + * this context. + */ + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; + + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; +}; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; + +struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + + /* + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. + */ + uint64_t thr_discrim; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; + + /* Included in heap profile dumps if has content. */ + char thread_name[PROF_THREAD_NAME_MAX_LEN]; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + bool attached; + bool expired; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void **vec; +}; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; + +struct prof_recent_s { + nstime_t alloc_time; + nstime_t dalloc_time; + + ql_elm(prof_recent_t) link; + size_t size; + size_t usize; + atomic_p_t alloc_edata; /* NULL means allocation has been freed. */ + prof_tctx_t *alloc_tctx; + prof_tctx_t *dalloc_tctx; +}; + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + +extern bool opt_prof; +extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; +extern unsigned opt_prof_bt_max; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern bool opt_prof_log; /* Turn logging on at boot. */ +extern char opt_prof_prefix[ +/* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; +extern bool opt_prof_unbias; + +/* Include pid namespace in profile file names. */ +extern bool opt_prof_pid_namespace; + +/* For recording recent allocations */ +extern ssize_t opt_prof_recent_alloc_max; + +/* Whether to use thread name provided by the system or by mallctl. */ +extern bool opt_prof_sys_thread_name; + +/* Whether to record per size class counts and request size totals. */ +extern bool opt_prof_stats; + +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active_state; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + +/* Profile dump interval, measured in bytes allocated. */ +extern uint64_t prof_interval; + +/* + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. + */ +extern size_t lg_prof_sample; + +extern bool prof_booted; + +void prof_backtrace_hook_set(prof_backtrace_hook_t hook); +prof_backtrace_hook_t prof_backtrace_hook_get(void); + +void prof_dump_hook_set(prof_dump_hook_t hook); +prof_dump_hook_t prof_dump_hook_get(void); + +void prof_sample_hook_set(prof_sample_hook_t hook); +prof_sample_hook_t prof_sample_hook_get(void); + +void prof_sample_free_hook_set(prof_sample_free_hook_t hook); +prof_sample_free_hook_t prof_sample_free_hook_get(void); + +/* Functions only accessed in prof_inlines.h */ +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); + +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx); +void prof_malloc_sample_object( + tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx); +void prof_free_sampled_object( + tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info); +prof_tctx_t *prof_tctx_create(tsd_t *tsd); +void prof_idump(tsdn_t *tsdn); +bool prof_mdump(tsd_t *tsd, const char *filename); +void prof_gdump(tsdn_t *tsdn); + +void prof_tdata_cleanup(tsd_t *tsd); +bool prof_active_get(tsdn_t *tsdn); +bool prof_active_set(tsdn_t *tsdn, bool active); +const char *prof_thread_name_get(tsd_t *tsd); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(tsd_t *tsd); +bool prof_thread_active_set(tsd_t *tsd, bool active); +bool prof_thread_active_init_get(tsdn_t *tsdn); +bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); +bool prof_gdump_get(tsdn_t *tsdn); +bool prof_gdump_set(tsdn_t *tsdn, bool active); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(tsd_t *tsd, base_t *base); +void prof_prefork0(tsdn_t *tsdn); +void prof_prefork1(tsdn_t *tsdn); +void prof_postfork_parent(tsdn_t *tsdn); +void prof_postfork_child(tsdn_t *tsdn); + +uint64_t prof_sample_new_event_wait(tsd_t *tsd); +uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd); + +extern te_base_cb_t prof_sample_te_handler; + +#endif /* JEMALLOC_INTERNAL_PROF_H */ diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h deleted file mode 100644 index cfb28988..00000000 --- a/include/jemalloc/internal/prof_externs.h +++ /dev/null @@ -1,132 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H -#define JEMALLOC_INTERNAL_PROF_EXTERNS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prof_hook.h" -#include "jemalloc/internal/thread_event_registry.h" - -extern bool opt_prof; -extern bool opt_prof_active; -extern bool opt_prof_thread_active_init; -extern unsigned opt_prof_bt_max; -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_gdump; /* High-water memory dumping. */ -extern bool opt_prof_final; /* Final profile dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ -extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */ -extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern bool opt_prof_log; /* Turn logging on at boot. */ -extern char opt_prof_prefix[ -/* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF - PATH_MAX + -#endif - 1]; -extern bool opt_prof_unbias; - -/* Include pid namespace in profile file names. */ -extern bool opt_prof_pid_namespace; - -/* For recording recent allocations */ -extern ssize_t opt_prof_recent_alloc_max; - -/* Whether to use thread name provided by the system or by mallctl. */ -extern bool opt_prof_sys_thread_name; - -/* Whether to record per size class counts and request size totals. */ -extern bool opt_prof_stats; - -/* Accessed via prof_active_[gs]et{_unlocked,}(). */ -extern bool prof_active_state; - -/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ -extern bool prof_gdump_val; - -/* Profile dump interval, measured in bytes allocated. */ -extern uint64_t prof_interval; - -/* - * Initialized as opt_lg_prof_sample, and potentially modified during profiling - * resets. - */ -extern size_t lg_prof_sample; - -extern bool prof_booted; - -void prof_backtrace_hook_set(prof_backtrace_hook_t hook); -prof_backtrace_hook_t prof_backtrace_hook_get(void); - -void prof_dump_hook_set(prof_dump_hook_t hook); -prof_dump_hook_t prof_dump_hook_get(void); - -void prof_sample_hook_set(prof_sample_hook_t hook); -prof_sample_hook_t prof_sample_hook_get(void); - -void prof_sample_free_hook_set(prof_sample_free_hook_t hook); -prof_sample_free_hook_t prof_sample_free_hook_get(void); - -/* Functions only accessed in prof_inlines.h */ -prof_tdata_t *prof_tdata_init(tsd_t *tsd); -prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); - -void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx); -void prof_malloc_sample_object( - tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx); -void prof_free_sampled_object( - tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info); -prof_tctx_t *prof_tctx_create(tsd_t *tsd); -void prof_idump(tsdn_t *tsdn); -bool prof_mdump(tsd_t *tsd, const char *filename); -void prof_gdump(tsdn_t *tsdn); - -void prof_tdata_cleanup(tsd_t *tsd); -bool prof_active_get(tsdn_t *tsdn); -bool prof_active_set(tsdn_t *tsdn, bool active); -const char *prof_thread_name_get(tsd_t *tsd); -int prof_thread_name_set(tsd_t *tsd, const char *thread_name); -bool prof_thread_active_get(tsd_t *tsd); -bool prof_thread_active_set(tsd_t *tsd, bool active); -bool prof_thread_active_init_get(tsdn_t *tsdn); -bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); -bool prof_gdump_get(tsdn_t *tsdn); -bool prof_gdump_set(tsdn_t *tsdn, bool active); -void prof_boot0(void); -void prof_boot1(void); -bool prof_boot2(tsd_t *tsd, base_t *base); -void prof_prefork0(tsdn_t *tsdn); -void prof_prefork1(tsdn_t *tsdn); -void prof_postfork_parent(tsdn_t *tsdn); -void prof_postfork_child(tsdn_t *tsdn); - -uint64_t prof_sample_new_event_wait(tsd_t *tsd); -uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd); - -/* - * The lookahead functionality facilitates events to be able to lookahead, i.e. - * without touching the event counters, to determine whether an event would be - * triggered. The event counters are not advanced until the end of the - * allocation / deallocation calls, so the lookahead can be useful if some - * preparation work for some event must be done early in the allocation / - * deallocation calls. - * - * Currently only the profiling sampling event needs the lookahead - * functionality, so we don't yet define general purpose lookahead functions. - */ - -JEMALLOC_ALWAYS_INLINE bool -te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) { - if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { - return false; - } - /* The subtraction is intentionally susceptible to underflow. */ - uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize - - tsd_thread_allocated_last_event_get(tsd); - return accumbytes >= tsd_prof_sample_event_wait_get(tsd); -} - -extern te_base_cb_t prof_sample_te_handler; - -#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */ diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h index 4a36bd7a..19dfd1a0 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines.h @@ -4,12 +4,37 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" -#include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/prof_structs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" +/* + * The lookahead functionality facilitates events to be able to lookahead, i.e. + * without touching the event counters, to determine whether an event would be + * triggered. The event counters are not advanced until the end of the + * allocation / deallocation calls, so the lookahead can be useful if some + * preparation work for some event must be done early in the allocation / + * deallocation calls. + * + * Currently only the profiling sampling event needs the lookahead + * functionality, so we don't yet define general purpose lookahead functions. + * + * Defined here rather than prof.h because the inline body depends on tsd + * accessors that aren't visible until tsd inlines are loaded. + */ + +JEMALLOC_ALWAYS_INLINE bool +te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) { + if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { + return false; + } + /* The subtraction is intentionally susceptible to underflow. */ + uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize + - tsd_thread_allocated_last_event_get(tsd); + return accumbytes >= tsd_prof_sample_event_wait_get(tsd); +} + JEMALLOC_ALWAYS_INLINE void prof_active_assert(void) { cassert(config_prof); diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h deleted file mode 100644 index d3a13718..00000000 --- a/include/jemalloc/internal/prof_structs.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H -#define JEMALLOC_INTERNAL_PROF_STRUCTS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/rb.h" - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - void **vec; - unsigned *len; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* Profiling counters. */ - uint64_t curobjs; - uint64_t curobjs_shifted_unbiased; - uint64_t curbytes; - uint64_t curbytes_unbiased; - uint64_t accumobjs; - uint64_t accumobjs_shifted_unbiased; - uint64_t accumbytes; - uint64_t accumbytes_unbiased; -}; - -typedef enum { - prof_tctx_state_initializing, - prof_tctx_state_nominal, - prof_tctx_state_dumping, - prof_tctx_state_purgatory /* Dumper must finish destroying. */ -} prof_tctx_state_t; - -struct prof_tctx_s { - /* Thread data for thread that performed the allocation. */ - prof_tdata_t *tdata; - - /* - * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be - * defunct during teardown. - */ - uint64_t thr_uid; - uint64_t thr_discrim; - - /* - * Reference count of how many times this tctx object is referenced in - * recent allocation / deallocation records, protected by tdata->lock. - */ - uint64_t recent_count; - - /* Profiling counters, protected by tdata->lock. */ - prof_cnt_t cnts; - - /* Associated global context. */ - prof_gctx_t *gctx; - - /* - * UID that distinguishes multiple tctx's created by the same thread, - * but coexisting in gctx->tctxs. There are two ways that such - * coexistence can occur: - * - A dumper thread can cause a tctx to be retained in the purgatory - * state. - * - Although a single "producer" thread must create all tctx's which - * share the same thr_uid, multiple "consumers" can each concurrently - * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only - * gets called once each time cnts.cur{objs,bytes} drop to 0, but this - * threshold can be hit again before the first consumer finishes - * executing prof_tctx_destroy(). - */ - uint64_t tctx_uid; - - /* Linkage into gctx's tctxs. */ - rb_node(prof_tctx_t) tctx_link; - - /* - * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents - * sample vs destroy race. - */ - bool prepared; - - /* Current dump-related state, protected by gctx->lock. */ - prof_tctx_state_t state; - - /* - * Copy of cnts snapshotted during early dump phase, protected by - * dump_mtx. - */ - prof_cnt_t dump_cnts; -}; -typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; - -struct prof_info_s { - /* Time when the allocation was made. */ - nstime_t alloc_time; - /* Points to the prof_tctx_t corresponding to the allocation. */ - prof_tctx_t *alloc_tctx; - /* Allocation request size. */ - size_t alloc_size; -}; - -struct prof_gctx_s { - /* Protects nlimbo, cnt_summed, and tctxs. */ - malloc_mutex_t *lock; - - /* - * Number of threads that currently cause this gctx to be in a state of - * limbo due to one of: - * - Initializing this gctx. - * - Initializing per thread counters associated with this gctx. - * - Preparing to destroy this gctx. - * - Dumping a heap profile that includes this gctx. - * nlimbo must be 1 (single destroyer) in order to safely destroy the - * gctx. - */ - unsigned nlimbo; - - /* - * Tree of profile counters, one for each thread that has allocated in - * this context. - */ - prof_tctx_tree_t tctxs; - - /* Linkage for tree of contexts to be dumped. */ - rb_node(prof_gctx_t) dump_link; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Associated backtrace. */ - prof_bt_t bt; - - /* Backtrace vector, variable size, referred to by bt. */ - void *vec[1]; -}; -typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; - -struct prof_tdata_s { - malloc_mutex_t *lock; - - /* Monotonically increasing unique thread identifier. */ - uint64_t thr_uid; - - /* - * Monotonically increasing discriminator among tdata structures - * associated with the same thr_uid. - */ - uint64_t thr_discrim; - - rb_node(prof_tdata_t) tdata_link; - - /* - * Counter used to initialize prof_tctx_t's tctx_uid. No locking is - * necessary when incrementing this field, because only one thread ever - * does so. - */ - uint64_t tctx_uid_next; - - /* - * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks - * backtraces for which it has non-zero allocation/deallocation counters - * associated with thread-specific prof_tctx_t objects. Other threads - * may write to prof_tctx_t contents when freeing associated objects. - */ - ckh_t bt2tctx; - - /* Included in heap profile dumps if has content. */ - char thread_name[PROF_THREAD_NAME_MAX_LEN]; - - /* State used to avoid dumping while operating on prof internals. */ - bool enq; - bool enq_idump; - bool enq_gdump; - - /* - * Set to true during an early dump phase for tdata's which are - * currently being dumped. New threads' tdata's have this initialized - * to false so that they aren't accidentally included in later dump - * phases. - */ - bool dumping; - - /* - * True if profiling is active for this tdata's thread - * (thread.prof.active mallctl). - */ - bool active; - - bool attached; - bool expired; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void **vec; -}; -typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; - -struct prof_recent_s { - nstime_t alloc_time; - nstime_t dalloc_time; - - ql_elm(prof_recent_t) link; - size_t size; - size_t usize; - atomic_p_t alloc_edata; /* NULL means allocation has been freed. */ - prof_tctx_t *alloc_tctx; - prof_tctx_t *dalloc_tctx; -}; - -#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */ diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h deleted file mode 100644 index 7468885e..00000000 --- a/include/jemalloc/internal/prof_types.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H -#define JEMALLOC_INTERNAL_PROF_TYPES_H - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_tctx_s prof_tctx_t; -typedef struct prof_info_s prof_info_t; -typedef struct prof_gctx_s prof_gctx_t; -typedef struct prof_tdata_s prof_tdata_t; -typedef struct prof_recent_s prof_recent_t; - -/* Option defaults. */ -#ifdef JEMALLOC_PROF -# define PROF_PREFIX_DEFAULT "jeprof" -#else -# define PROF_PREFIX_DEFAULT "" -#endif -#define LG_PROF_SAMPLE_DEFAULT 19 -#define LG_PROF_INTERVAL_DEFAULT -1 - -/* - * Hard limit on stack backtrace depth. The version of prof_backtrace() that - * is based on __builtin_return_address() necessarily has a hard-coded number - * of backtrace frame handlers, and should be kept in sync with this setting. - */ -#ifdef JEMALLOC_PROF_GCC -# define PROF_BT_MAX_LIMIT 256 -#else -# define PROF_BT_MAX_LIMIT UINT_MAX -#endif -#define PROF_BT_MAX_DEFAULT 128 - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#ifndef JEMALLOC_PROF -/* Minimize memory bloat for non-prof builds. */ -# define PROF_DUMP_BUFSIZE 1 -#elif defined(JEMALLOC_DEBUG) -/* Use a small buffer size in debug build, mainly to facilitate testing. */ -# define PROF_DUMP_BUFSIZE 16 -#else -# define PROF_DUMP_BUFSIZE 65536 -#endif - -/* Size of size class related tables */ -#ifdef JEMALLOC_PROF -# define PROF_SC_NSIZES SC_NSIZES -#else -/* Minimize memory bloat for non-prof builds. */ -# define PROF_SC_NSIZES 1 -#endif - -/* Size of stack-allocated buffer used by prof_printf(). */ -#define PROF_PRINTF_BUFSIZE 128 - -/* - * Number of mutexes shared among all gctx's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NCTX_LOCKS 1024 - -/* - * Number of mutexes shared among all tdata's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NTDATA_LOCKS 256 - -/* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF -# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1) -#else -# define PROF_DUMP_FILENAME_LEN 1 -#endif - -/* Default number of recent allocations to record. */ -#define PROF_RECENT_ALLOC_MAX_DEFAULT 0 - -/* Thread name storage size limit. */ -#define PROF_THREAD_NAME_MAX_LEN 16 - -/* - * Minimum required alignment for sampled allocations. Over-aligning sampled - * allocations allows us to quickly identify them on the dalloc path without - * resorting to metadata lookup. - */ -#define PROF_SAMPLE_ALIGNMENT PAGE -#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK - -/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ -#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U)) - -#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h new file mode 100644 index 00000000..12a55f19 --- /dev/null +++ b/include/jemalloc/internal/tcache.h @@ -0,0 +1,198 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_H +#define JEMALLOC_INTERNAL_TCACHE_H + +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/cache_bin.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/thread_event_registry.h" +#include "jemalloc/internal/ticker.h" + +/* Forward decl; only base_t * is used as a pointer arg below. */ +typedef struct base_s base_t; + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +typedef struct tcache_slow_s tcache_slow_t; +typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; + +/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */ +#define TCACHE_ZERO_INITIALIZER \ + { 0 } +#define TCACHE_SLOW_ZERO_INITIALIZER \ + { \ + { 0 } \ + } + +/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ +#define TCACHE_ENABLED_ZERO_INITIALIZER false + +/* Used for explicit tcache only. Means flushed but not destroyed. */ +/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ +#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1) + +#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD +#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT) +#define TCACHE_NBINS_MAX \ + (SC_NBINS \ + + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \ + + 1) +#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */ +#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */ +#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1) +#define TCACHE_GC_LARGE_NBINS_MAX 1 + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + +/* + * The tcache state is split into the slow and hot path data. Each has a + * pointer to the other, and the data always comes in pairs. The layout of each + * of them varies in practice; tcache_slow lives in the TSD for the automatic + * tcache, and as part of a dynamic allocation for manual allocations. Keeping + * a pointer to tcache_slow lets us treat these cases uniformly, rather than + * splitting up the tcache [de]allocation code into those paths called with the + * TSD tcache and those called with a manual tcache. + */ + +struct tcache_slow_s { + /* + * The descriptor lets the arena find our cache bins without seeing the + * tcache definition. This enables arenas to aggregate stats across + * tcaches without having a tcache dependency. + */ + cache_bin_array_descriptor_t cache_bin_array_descriptor; + + /* The arena this tcache is associated with. */ + arena_t *arena; + /* The number of bins activated in the tcache. */ + unsigned tcache_nbins; + /* Last time GC has been performed. */ + nstime_t last_gc_time; + /* Next bin to GC. */ + szind_t next_gc_bin; + szind_t next_gc_bin_small; + szind_t next_gc_bin_large; + /* For small bins, help determine how many items to fill at a time. */ + cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS]; + /* For small bins, whether has been refilled since last GC. */ + bool bin_refilled[SC_NBINS]; + /* + * For small bins, the number of items we can pretend to flush before + * actually flushing. + */ + uint8_t bin_flush_delay_items[SC_NBINS]; + /* + * The start of the allocation containing the dynamic allocation for + * either the cache bins alone, or the cache bin memory as well as this + * tcache_slow_t and its associated tcache_t. + */ + void *dyn_alloc; + + /* The associated bins. */ + tcache_t *tcache; +}; + +struct tcache_s { + tcache_slow_t *tcache_slow; + cache_bin_t bins[TCACHE_NBINS_MAX]; +}; + +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + +extern bool opt_tcache; +extern size_t opt_tcache_max; +extern ssize_t opt_lg_tcache_nslots_mul; +extern unsigned opt_tcache_nslots_small_min; +extern unsigned opt_tcache_nslots_small_max; +extern unsigned opt_tcache_nslots_large; +extern ssize_t opt_lg_tcache_shift; +extern size_t opt_tcache_gc_incr_bytes; +extern size_t opt_tcache_gc_delay_bytes; +extern unsigned opt_lg_tcache_flush_small_div; +extern unsigned opt_lg_tcache_flush_large_div; + +/* + * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more + * large-object bins. This is only used during threads initialization and + * changing it will not reflect on initialized threads as expected. Thus, + * it should not be changed on the fly. To change the number of tcache bins + * in use, refer to tcache_nbins of each tcache. + */ +extern unsigned global_do_not_change_tcache_nbins; + +/* + * Maximum cached size class. Same as above, this is only used during threads + * initialization and should not be changed. To change the maximum cached size + * class, refer to tcache_max of each tcache. + */ +extern size_t global_do_not_change_tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; + +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, bool *tcache_success); + +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, unsigned rem); +void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, + cache_bin_t *cache_bin, szind_t binind, bool is_small); +bool tcache_bin_info_default_init( + const char *bin_settings_segment_cur, size_t len_left); +bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len); +bool tcache_bin_ncached_max_read( + tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max); +void tcache_arena_reassociate( + tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); +tcache_t *tcache_create_explicit(tsd_t *tsd); +bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max); +void tcache_cleanup(tsd_t *tsd); +bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(tsdn_t *tsdn, base_t *base); +void tcache_arena_associate( + tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); +cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor( + tsdn_t *tsdn, arena_t *arena); +void tcache_prefork(tsdn_t *tsdn); +void tcache_postfork_parent(tsdn_t *tsdn); +void tcache_postfork_child(tsdn_t *tsdn); +void tcache_flush(tsd_t *tsd); +bool tsd_tcache_enabled_data_init(tsd_t *tsd); +void tcache_enabled_set(tsd_t *tsd, bool enabled); + +extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size, + size_t alignment); + +void tcache_assert_initialized(tcache_t *tcache); + +extern te_base_cb_t tcache_gc_te_handler; + +#endif /* JEMALLOC_INTERNAL_TCACHE_H */ diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h deleted file mode 100644 index 4dc0bae9..00000000 --- a/include/jemalloc/internal/tcache_externs.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H -#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/cache_bin.h" -#include "jemalloc/internal/sz.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/thread_event_registry.h" - -extern bool opt_tcache; -extern size_t opt_tcache_max; -extern ssize_t opt_lg_tcache_nslots_mul; -extern unsigned opt_tcache_nslots_small_min; -extern unsigned opt_tcache_nslots_small_max; -extern unsigned opt_tcache_nslots_large; -extern ssize_t opt_lg_tcache_shift; -extern size_t opt_tcache_gc_incr_bytes; -extern size_t opt_tcache_gc_delay_bytes; -extern unsigned opt_lg_tcache_flush_small_div; -extern unsigned opt_lg_tcache_flush_large_div; - -/* - * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more - * large-object bins. This is only used during threads initialization and - * changing it will not reflect on initialized threads as expected. Thus, - * it should not be changed on the fly. To change the number of tcache bins - * in use, refer to tcache_nbins of each tcache. - */ -extern unsigned global_do_not_change_tcache_nbins; - -/* - * Maximum cached size class. Same as above, this is only used during threads - * initialization and should not be changed. To change the maximum cached size - * class, refer to tcache_max of each tcache. - */ -extern size_t global_do_not_change_tcache_maxclass; - -/* - * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and - * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are - * completely disjoint from this data structure. tcaches starts off as a sparse - * array, so it has no physical memory footprint until individual pages are - * touched. This allows the entire array to be allocated the first time an - * explicit tcache is created without a disproportionate impact on memory usage. - */ -extern tcaches_t *tcaches; - -size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); -void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, bool *tcache_success); - -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, unsigned rem); -void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, bool is_small); -bool tcache_bin_info_default_init( - const char *bin_settings_segment_cur, size_t len_left); -bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len); -bool tcache_bin_ncached_max_read( - tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max); -void tcache_arena_reassociate( - tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); -tcache_t *tcache_create_explicit(tsd_t *tsd); -bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max); -void tcache_cleanup(tsd_t *tsd); -bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); -void tcaches_flush(tsd_t *tsd, unsigned ind); -void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(tsdn_t *tsdn, base_t *base); -void tcache_arena_associate( - tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena); -cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor( - tsdn_t *tsdn, arena_t *arena); -void tcache_prefork(tsdn_t *tsdn); -void tcache_postfork_parent(tsdn_t *tsdn); -void tcache_postfork_child(tsdn_t *tsdn); -void tcache_flush(tsd_t *tsd); -bool tsd_tcache_enabled_data_init(tsd_t *tsd); -void tcache_enabled_set(tsd_t *tsd, bool enabled); - -extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size, - size_t alignment); - -void tcache_assert_initialized(tcache_t *tcache); - -extern te_base_cb_t tcache_gc_te_handler; - -#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */ diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 181db1b3..8ce0fb01 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -10,7 +10,7 @@ #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/util.h" static inline bool diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h deleted file mode 100644 index 710286c9..00000000 --- a/include/jemalloc/internal/tcache_structs.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H -#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/cache_bin.h" -#include "jemalloc/internal/ql.h" -#include "jemalloc/internal/sc.h" -#include "jemalloc/internal/tcache_types.h" -#include "jemalloc/internal/ticker.h" - -/* - * The tcache state is split into the slow and hot path data. Each has a - * pointer to the other, and the data always comes in pairs. The layout of each - * of them varies in practice; tcache_slow lives in the TSD for the automatic - * tcache, and as part of a dynamic allocation for manual allocations. Keeping - * a pointer to tcache_slow lets us treat these cases uniformly, rather than - * splitting up the tcache [de]allocation code into those paths called with the - * TSD tcache and those called with a manual tcache. - */ - -struct tcache_slow_s { - /* - * The descriptor lets the arena find our cache bins without seeing the - * tcache definition. This enables arenas to aggregate stats across - * tcaches without having a tcache dependency. - */ - cache_bin_array_descriptor_t cache_bin_array_descriptor; - - /* The arena this tcache is associated with. */ - arena_t *arena; - /* The number of bins activated in the tcache. */ - unsigned tcache_nbins; - /* Last time GC has been performed. */ - nstime_t last_gc_time; - /* Next bin to GC. */ - szind_t next_gc_bin; - szind_t next_gc_bin_small; - szind_t next_gc_bin_large; - /* For small bins, help determine how many items to fill at a time. */ - cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS]; - /* For small bins, whether has been refilled since last GC. */ - bool bin_refilled[SC_NBINS]; - /* - * For small bins, the number of items we can pretend to flush before - * actually flushing. - */ - uint8_t bin_flush_delay_items[SC_NBINS]; - /* - * The start of the allocation containing the dynamic allocation for - * either the cache bins alone, or the cache bin memory as well as this - * tcache_slow_t and its associated tcache_t. - */ - void *dyn_alloc; - - /* The associated bins. */ - tcache_t *tcache; -}; - -struct tcache_s { - tcache_slow_t *tcache_slow; - cache_bin_t bins[TCACHE_NBINS_MAX]; -}; - -/* Linkage for list of available (previously used) explicit tcache IDs. */ -struct tcaches_s { - union { - tcache_t *tcache; - tcaches_t *next; - }; -}; - -#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */ diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h deleted file mode 100644 index 27d80d3c..00000000 --- a/include/jemalloc/internal/tcache_types.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H -#define JEMALLOC_INTERNAL_TCACHE_TYPES_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/sc.h" - -typedef struct tcache_slow_s tcache_slow_t; -typedef struct tcache_s tcache_t; -typedef struct tcaches_s tcaches_t; - -/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */ -#define TCACHE_ZERO_INITIALIZER \ - { 0 } -#define TCACHE_SLOW_ZERO_INITIALIZER \ - { \ - { 0 } \ - } - -/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ -#define TCACHE_ENABLED_ZERO_INITIALIZER false - -/* Used for explicit tcache only. Means flushed but not destroyed. */ -/* NOLINTNEXTLINE(performance-no-int-to-ptr) */ -#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1) - -#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD -#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT) -#define TCACHE_NBINS_MAX \ - (SC_NBINS \ - + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \ - + 1) -#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */ -#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */ -#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1) -#define TCACHE_GC_LARGE_NBINS_MAX 1 - -#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */ diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h index a7b6fa5e..09590eaf 100644 --- a/include/jemalloc/internal/tsd_internals.h +++ b/include/jemalloc/internal/tsd_internals.h @@ -9,11 +9,12 @@ #include "jemalloc/internal/tsd_binshards.h" #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/peak.h" -#include "jemalloc/internal/prof_types.h" #include "jemalloc/internal/rtree_tsd.h" -#include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/thread_event_registry.h" + +/* Forward decl; tsd_internals.h only uses prof_tdata_t as a pointer type. */ +typedef struct prof_tdata_s prof_tdata_t; #include "jemalloc/internal/tsd_types.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/witness.h" diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c index b8307df0..1db8fa72 100644 --- a/src/thread_event_registry.c +++ b/src/thread_event_registry.c @@ -3,9 +3,9 @@ #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/thread_event_registry.h" -#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/tcache.h" #include "jemalloc/internal/peak_event.h" -#include "jemalloc/internal/prof_externs.h" +#include "jemalloc/internal/prof.h" #include "jemalloc/internal/stats.h" static malloc_mutex_t uevents_mu;