Consolidate prof_* and tcache_* header splits

Each of these components had a four-way split (_types, _structs,
_externs, _inlines) that dates back to the old "include each section
multiple times from a master file" pattern. With Step 2's edata <->
prof_types decoupling, merging _types + _structs + _externs into one
header per component no longer risks recreating an include cycle.

- prof.h replaces prof_types.h + prof_structs.h + prof_externs.h.
- tcache.h replaces tcache_types.h + tcache_structs.h + tcache_externs.h.

prof_inlines.h and tcache_inlines.h are kept separate: prof_inlines.h
sits at the bottom of the dependency layering, and tcache_inlines.h's
include of arena_externs.h is the asymmetric cycle-breaker that keeps
the arena<->tcache symbol cycle from becoming an include cycle.

Two surprises required adjustments beyond a straight concatenation:

1. te_prof_sample_event_lookahead was a JEMALLOC_ALWAYS_INLINE function
   defined in prof_externs.h, but its body calls tsd_thread_allocated_*
   accessors that only exist after tsd inlines are loaded. The original
   layering hid this because prof_externs.h was only included near the
   bottom of jemalloc_internal_includes.h. After consolidation,
   tsd_internals.h's includes pull prof.h in earlier, exposing the
   ordering dependency. Moved the inline to prof_inlines.h (where
   inline definitions belong anyway) and left only the related extern
   in prof.h.

2. base.h was included from prof_externs.h and tcache_externs.h purely
   for base_t * pointer arguments on a couple of declarations. Carrying
   that include into the merged prof.h / tcache.h would pull ehooks.h
   (-> tsd.h) into tsd_internals.h before tsd_internals.h finishes
   declaring its tsd accessors. Replaced with a forward declaration of
   base_t in each merged file.

Similarly, tsd_internals.h's prior #include of prof_types.h becomes a
forward decl of prof_tdata_t (the only prof symbol it references, and
only as a pointer), and large.h needs a forward decl of prof_info_t
because large.h is loaded before prof.h in the new master ordering.

No inline / static qualifiers are dropped; only the one inline moves
files. #ifdef blocks (JEMALLOC_PROF, JEMALLOC_PROF_LIBGCC,
JEMALLOC_PROF_GCC, JEMALLOC_DEBUG) are kept intact.
This commit is contained in:
Slobodan Predolac 2026-05-27 18:07:16 -04:00 committed by Slobodan Predolac
parent fb92d8a916
commit ab4c178444
16 changed files with 666 additions and 664 deletions

View file

@ -10,8 +10,7 @@
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/large.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/prof_externs.h"
#include "jemalloc/internal/prof_structs.h"
#include "jemalloc/internal/prof.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sc.h"

View file

@ -41,16 +41,12 @@
/******************************************************************************/
#include "jemalloc/internal/arena_types.h"
#include "jemalloc/internal/tcache_types.h"
#include "jemalloc/internal/prof_types.h"
/******************************************************************************/
/* STRUCTS */
/******************************************************************************/
#include "jemalloc/internal/prof_structs.h"
#include "jemalloc/internal/arena_structs.h"
#include "jemalloc/internal/tcache_structs.h"
/******************************************************************************/
/* EXTERNS */
@ -59,9 +55,9 @@
#include "jemalloc/internal/jemalloc_internal_externs.h"
#include "jemalloc/internal/arena_externs.h"
#include "jemalloc/internal/large.h"
#include "jemalloc/internal/tcache_externs.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/malloc_dispatch_externs.h"
#include "jemalloc/internal/prof_externs.h"
#include "jemalloc/internal/prof.h"
#include "jemalloc/internal/background_thread.h"
/******************************************************************************/

View file

@ -9,7 +9,7 @@
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/tcache_externs.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/ticker.h"
JEMALLOC_ALWAYS_INLINE malloc_cpuid_t

View file

@ -4,6 +4,9 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/edata.h"
/* Forward decl; only prof_info_t * is used as a pointer arg below. */
typedef struct prof_info_s prof_info_t;
void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
void *large_palloc(
tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);

View file

@ -0,0 +1,427 @@
#ifndef JEMALLOC_INTERNAL_PROF_H
#define JEMALLOC_INTERNAL_PROF_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/prng.h"
#include "jemalloc/internal/prof_hook.h"
#include "jemalloc/internal/rb.h"
#include "jemalloc/internal/thread_event_registry.h"
/* Forward decl; only base_t * is used as a pointer arg below. */
typedef struct base_s base_t;
/******************************************************************************/
/* TYPES */
/******************************************************************************/
typedef struct prof_bt_s prof_bt_t;
typedef struct prof_cnt_s prof_cnt_t;
typedef struct prof_tctx_s prof_tctx_t;
typedef struct prof_info_s prof_info_t;
typedef struct prof_gctx_s prof_gctx_t;
typedef struct prof_tdata_s prof_tdata_t;
typedef struct prof_recent_s prof_recent_t;
/* Option defaults. */
#ifdef JEMALLOC_PROF
# define PROF_PREFIX_DEFAULT "jeprof"
#else
# define PROF_PREFIX_DEFAULT ""
#endif
#define LG_PROF_SAMPLE_DEFAULT 19
#define LG_PROF_INTERVAL_DEFAULT -1
/*
* Hard limit on stack backtrace depth. The version of prof_backtrace() that
* is based on __builtin_return_address() necessarily has a hard-coded number
* of backtrace frame handlers, and should be kept in sync with this setting.
*/
#ifdef JEMALLOC_PROF_GCC
# define PROF_BT_MAX_LIMIT 256
#else
# define PROF_BT_MAX_LIMIT UINT_MAX
#endif
#define PROF_BT_MAX_DEFAULT 128
/* Initial hash table size. */
#define PROF_CKH_MINITEMS 64
/* Size of memory buffer to use when writing dump files. */
#ifndef JEMALLOC_PROF
/* Minimize memory bloat for non-prof builds. */
# define PROF_DUMP_BUFSIZE 1
#elif defined(JEMALLOC_DEBUG)
/* Use a small buffer size in debug build, mainly to facilitate testing. */
# define PROF_DUMP_BUFSIZE 16
#else
# define PROF_DUMP_BUFSIZE 65536
#endif
/* Size of size class related tables */
#ifdef JEMALLOC_PROF
# define PROF_SC_NSIZES SC_NSIZES
#else
/* Minimize memory bloat for non-prof builds. */
# define PROF_SC_NSIZES 1
#endif
/* Size of stack-allocated buffer used by prof_printf(). */
#define PROF_PRINTF_BUFSIZE 128
/*
* Number of mutexes shared among all gctx's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NCTX_LOCKS 1024
/*
* Number of mutexes shared among all tdata's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NTDATA_LOCKS 256
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
#else
# define PROF_DUMP_FILENAME_LEN 1
#endif
/* Default number of recent allocations to record. */
#define PROF_RECENT_ALLOC_MAX_DEFAULT 0
/* Thread name storage size limit. */
#define PROF_THREAD_NAME_MAX_LEN 16
/*
* Minimum required alignment for sampled allocations. Over-aligning sampled
* allocations allows us to quickly identify them on the dalloc path without
* resorting to metadata lookup.
*/
#define PROF_SAMPLE_ALIGNMENT PAGE
#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
/******************************************************************************/
/* STRUCTS */
/******************************************************************************/
struct prof_bt_s {
/* Backtrace, stored as len program counters. */
void **vec;
unsigned len;
};
#ifdef JEMALLOC_PROF_LIBGCC
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
typedef struct {
void **vec;
unsigned *len;
unsigned max;
} prof_unwind_data_t;
#endif
struct prof_cnt_s {
/* Profiling counters. */
uint64_t curobjs;
uint64_t curobjs_shifted_unbiased;
uint64_t curbytes;
uint64_t curbytes_unbiased;
uint64_t accumobjs;
uint64_t accumobjs_shifted_unbiased;
uint64_t accumbytes;
uint64_t accumbytes_unbiased;
};
typedef enum {
prof_tctx_state_initializing,
prof_tctx_state_nominal,
prof_tctx_state_dumping,
prof_tctx_state_purgatory /* Dumper must finish destroying. */
} prof_tctx_state_t;
struct prof_tctx_s {
/* Thread data for thread that performed the allocation. */
prof_tdata_t *tdata;
/*
* Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
* defunct during teardown.
*/
uint64_t thr_uid;
uint64_t thr_discrim;
/*
* Reference count of how many times this tctx object is referenced in
* recent allocation / deallocation records, protected by tdata->lock.
*/
uint64_t recent_count;
/* Profiling counters, protected by tdata->lock. */
prof_cnt_t cnts;
/* Associated global context. */
prof_gctx_t *gctx;
/*
* UID that distinguishes multiple tctx's created by the same thread,
* but coexisting in gctx->tctxs. There are two ways that such
* coexistence can occur:
* - A dumper thread can cause a tctx to be retained in the purgatory
* state.
* - Although a single "producer" thread must create all tctx's which
* share the same thr_uid, multiple "consumers" can each concurrently
* execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
* gets called once each time cnts.cur{objs,bytes} drop to 0, but this
* threshold can be hit again before the first consumer finishes
* executing prof_tctx_destroy().
*/
uint64_t tctx_uid;
/* Linkage into gctx's tctxs. */
rb_node(prof_tctx_t) tctx_link;
/*
* True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
* sample vs destroy race.
*/
bool prepared;
/* Current dump-related state, protected by gctx->lock. */
prof_tctx_state_t state;
/*
* Copy of cnts snapshotted during early dump phase, protected by
* dump_mtx.
*/
prof_cnt_t dump_cnts;
};
typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
struct prof_info_s {
/* Time when the allocation was made. */
nstime_t alloc_time;
/* Points to the prof_tctx_t corresponding to the allocation. */
prof_tctx_t *alloc_tctx;
/* Allocation request size. */
size_t alloc_size;
};
struct prof_gctx_s {
/* Protects nlimbo, cnt_summed, and tctxs. */
malloc_mutex_t *lock;
/*
* Number of threads that currently cause this gctx to be in a state of
* limbo due to one of:
* - Initializing this gctx.
* - Initializing per thread counters associated with this gctx.
* - Preparing to destroy this gctx.
* - Dumping a heap profile that includes this gctx.
* nlimbo must be 1 (single destroyer) in order to safely destroy the
* gctx.
*/
unsigned nlimbo;
/*
* Tree of profile counters, one for each thread that has allocated in
* this context.
*/
prof_tctx_tree_t tctxs;
/* Linkage for tree of contexts to be dumped. */
rb_node(prof_gctx_t) dump_link;
/* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed;
/* Associated backtrace. */
prof_bt_t bt;
/* Backtrace vector, variable size, referred to by bt. */
void *vec[1];
};
typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
struct prof_tdata_s {
malloc_mutex_t *lock;
/* Monotonically increasing unique thread identifier. */
uint64_t thr_uid;
/*
* Monotonically increasing discriminator among tdata structures
* associated with the same thr_uid.
*/
uint64_t thr_discrim;
rb_node(prof_tdata_t) tdata_link;
/*
* Counter used to initialize prof_tctx_t's tctx_uid. No locking is
* necessary when incrementing this field, because only one thread ever
* does so.
*/
uint64_t tctx_uid_next;
/*
* Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
* backtraces for which it has non-zero allocation/deallocation counters
* associated with thread-specific prof_tctx_t objects. Other threads
* may write to prof_tctx_t contents when freeing associated objects.
*/
ckh_t bt2tctx;
/* Included in heap profile dumps if has content. */
char thread_name[PROF_THREAD_NAME_MAX_LEN];
/* State used to avoid dumping while operating on prof internals. */
bool enq;
bool enq_idump;
bool enq_gdump;
/*
* Set to true during an early dump phase for tdata's which are
* currently being dumped. New threads' tdata's have this initialized
* to false so that they aren't accidentally included in later dump
* phases.
*/
bool dumping;
/*
* True if profiling is active for this tdata's thread
* (thread.prof.active mallctl).
*/
bool active;
bool attached;
bool expired;
/* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed;
/* Backtrace vector, used for calls to prof_backtrace(). */
void **vec;
};
typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
struct prof_recent_s {
nstime_t alloc_time;
nstime_t dalloc_time;
ql_elm(prof_recent_t) link;
size_t size;
size_t usize;
atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
prof_tctx_t *alloc_tctx;
prof_tctx_t *dalloc_tctx;
};
/******************************************************************************/
/* EXTERNS */
/******************************************************************************/
extern bool opt_prof;
extern bool opt_prof_active;
extern bool opt_prof_thread_active_init;
extern unsigned opt_prof_bt_max;
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
extern bool opt_prof_gdump; /* High-water memory dumping. */
extern bool opt_prof_final; /* Final profile dumping. */
extern bool opt_prof_leak; /* Dump leak summary at exit. */
extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */
extern bool opt_prof_accum; /* Report cumulative bytes. */
extern bool opt_prof_log; /* Turn logging on at boot. */
extern char opt_prof_prefix[
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
PATH_MAX +
#endif
1];
extern bool opt_prof_unbias;
/* Include pid namespace in profile file names. */
extern bool opt_prof_pid_namespace;
/* For recording recent allocations */
extern ssize_t opt_prof_recent_alloc_max;
/* Whether to use thread name provided by the system or by mallctl. */
extern bool opt_prof_sys_thread_name;
/* Whether to record per size class counts and request size totals. */
extern bool opt_prof_stats;
/* Accessed via prof_active_[gs]et{_unlocked,}(). */
extern bool prof_active_state;
/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
extern bool prof_gdump_val;
/* Profile dump interval, measured in bytes allocated. */
extern uint64_t prof_interval;
/*
* Initialized as opt_lg_prof_sample, and potentially modified during profiling
* resets.
*/
extern size_t lg_prof_sample;
extern bool prof_booted;
void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
prof_backtrace_hook_t prof_backtrace_hook_get(void);
void prof_dump_hook_set(prof_dump_hook_t hook);
prof_dump_hook_t prof_dump_hook_get(void);
void prof_sample_hook_set(prof_sample_hook_t hook);
prof_sample_hook_t prof_sample_hook_get(void);
void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
prof_sample_free_hook_t prof_sample_free_hook_get(void);
/* Functions only accessed in prof_inlines.h */
prof_tdata_t *prof_tdata_init(tsd_t *tsd);
prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
void prof_malloc_sample_object(
tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx);
void prof_free_sampled_object(
tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info);
prof_tctx_t *prof_tctx_create(tsd_t *tsd);
void prof_idump(tsdn_t *tsdn);
bool prof_mdump(tsd_t *tsd, const char *filename);
void prof_gdump(tsdn_t *tsdn);
void prof_tdata_cleanup(tsd_t *tsd);
bool prof_active_get(tsdn_t *tsdn);
bool prof_active_set(tsdn_t *tsdn, bool active);
const char *prof_thread_name_get(tsd_t *tsd);
int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
bool prof_thread_active_get(tsd_t *tsd);
bool prof_thread_active_set(tsd_t *tsd, bool active);
bool prof_thread_active_init_get(tsdn_t *tsdn);
bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
bool prof_gdump_get(tsdn_t *tsdn);
bool prof_gdump_set(tsdn_t *tsdn, bool active);
void prof_boot0(void);
void prof_boot1(void);
bool prof_boot2(tsd_t *tsd, base_t *base);
void prof_prefork0(tsdn_t *tsdn);
void prof_prefork1(tsdn_t *tsdn);
void prof_postfork_parent(tsdn_t *tsdn);
void prof_postfork_child(tsdn_t *tsdn);
uint64_t prof_sample_new_event_wait(tsd_t *tsd);
uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
extern te_base_cb_t prof_sample_te_handler;
#endif /* JEMALLOC_INTERNAL_PROF_H */

View file

@ -1,132 +0,0 @@
#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
#define JEMALLOC_INTERNAL_PROF_EXTERNS_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/prof_hook.h"
#include "jemalloc/internal/thread_event_registry.h"
extern bool opt_prof;
extern bool opt_prof_active;
extern bool opt_prof_thread_active_init;
extern unsigned opt_prof_bt_max;
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
extern bool opt_prof_gdump; /* High-water memory dumping. */
extern bool opt_prof_final; /* Final profile dumping. */
extern bool opt_prof_leak; /* Dump leak summary at exit. */
extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */
extern bool opt_prof_accum; /* Report cumulative bytes. */
extern bool opt_prof_log; /* Turn logging on at boot. */
extern char opt_prof_prefix[
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
PATH_MAX +
#endif
1];
extern bool opt_prof_unbias;
/* Include pid namespace in profile file names. */
extern bool opt_prof_pid_namespace;
/* For recording recent allocations */
extern ssize_t opt_prof_recent_alloc_max;
/* Whether to use thread name provided by the system or by mallctl. */
extern bool opt_prof_sys_thread_name;
/* Whether to record per size class counts and request size totals. */
extern bool opt_prof_stats;
/* Accessed via prof_active_[gs]et{_unlocked,}(). */
extern bool prof_active_state;
/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
extern bool prof_gdump_val;
/* Profile dump interval, measured in bytes allocated. */
extern uint64_t prof_interval;
/*
* Initialized as opt_lg_prof_sample, and potentially modified during profiling
* resets.
*/
extern size_t lg_prof_sample;
extern bool prof_booted;
void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
prof_backtrace_hook_t prof_backtrace_hook_get(void);
void prof_dump_hook_set(prof_dump_hook_t hook);
prof_dump_hook_t prof_dump_hook_get(void);
void prof_sample_hook_set(prof_sample_hook_t hook);
prof_sample_hook_t prof_sample_hook_get(void);
void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
prof_sample_free_hook_t prof_sample_free_hook_get(void);
/* Functions only accessed in prof_inlines.h */
prof_tdata_t *prof_tdata_init(tsd_t *tsd);
prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
void prof_malloc_sample_object(
tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx);
void prof_free_sampled_object(
tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info);
prof_tctx_t *prof_tctx_create(tsd_t *tsd);
void prof_idump(tsdn_t *tsdn);
bool prof_mdump(tsd_t *tsd, const char *filename);
void prof_gdump(tsdn_t *tsdn);
void prof_tdata_cleanup(tsd_t *tsd);
bool prof_active_get(tsdn_t *tsdn);
bool prof_active_set(tsdn_t *tsdn, bool active);
const char *prof_thread_name_get(tsd_t *tsd);
int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
bool prof_thread_active_get(tsd_t *tsd);
bool prof_thread_active_set(tsd_t *tsd, bool active);
bool prof_thread_active_init_get(tsdn_t *tsdn);
bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
bool prof_gdump_get(tsdn_t *tsdn);
bool prof_gdump_set(tsdn_t *tsdn, bool active);
void prof_boot0(void);
void prof_boot1(void);
bool prof_boot2(tsd_t *tsd, base_t *base);
void prof_prefork0(tsdn_t *tsdn);
void prof_prefork1(tsdn_t *tsdn);
void prof_postfork_parent(tsdn_t *tsdn);
void prof_postfork_child(tsdn_t *tsdn);
uint64_t prof_sample_new_event_wait(tsd_t *tsd);
uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
/*
* The lookahead functionality facilitates events to be able to lookahead, i.e.
* without touching the event counters, to determine whether an event would be
* triggered. The event counters are not advanced until the end of the
* allocation / deallocation calls, so the lookahead can be useful if some
* preparation work for some event must be done early in the allocation /
* deallocation calls.
*
* Currently only the profiling sampling event needs the lookahead
* functionality, so we don't yet define general purpose lookahead functions.
*/
JEMALLOC_ALWAYS_INLINE bool
te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
return false;
}
/* The subtraction is intentionally susceptible to underflow. */
uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize
- tsd_thread_allocated_last_event_get(tsd);
return accumbytes >= tsd_prof_sample_event_wait_get(tsd);
}
extern te_base_cb_t prof_sample_te_handler;
#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */

View file

@ -4,12 +4,37 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/arena_inlines_b.h"
#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
#include "jemalloc/internal/prof_externs.h"
#include "jemalloc/internal/prof_structs.h"
#include "jemalloc/internal/prof.h"
#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/thread_event.h"
/*
* The lookahead functionality facilitates events to be able to lookahead, i.e.
* without touching the event counters, to determine whether an event would be
* triggered. The event counters are not advanced until the end of the
* allocation / deallocation calls, so the lookahead can be useful if some
* preparation work for some event must be done early in the allocation /
* deallocation calls.
*
* Currently only the profiling sampling event needs the lookahead
* functionality, so we don't yet define general purpose lookahead functions.
*
* Defined here rather than prof.h because the inline body depends on tsd
* accessors that aren't visible until tsd inlines are loaded.
*/
JEMALLOC_ALWAYS_INLINE bool
te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
return false;
}
/* The subtraction is intentionally susceptible to underflow. */
uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize
- tsd_thread_allocated_last_event_get(tsd);
return accumbytes >= tsd_prof_sample_event_wait_get(tsd);
}
JEMALLOC_ALWAYS_INLINE void
prof_active_assert(void) {
cassert(config_prof);

View file

@ -1,221 +0,0 @@
#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
#define JEMALLOC_INTERNAL_PROF_STRUCTS_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/prng.h"
#include "jemalloc/internal/rb.h"
struct prof_bt_s {
/* Backtrace, stored as len program counters. */
void **vec;
unsigned len;
};
#ifdef JEMALLOC_PROF_LIBGCC
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
typedef struct {
void **vec;
unsigned *len;
unsigned max;
} prof_unwind_data_t;
#endif
struct prof_cnt_s {
/* Profiling counters. */
uint64_t curobjs;
uint64_t curobjs_shifted_unbiased;
uint64_t curbytes;
uint64_t curbytes_unbiased;
uint64_t accumobjs;
uint64_t accumobjs_shifted_unbiased;
uint64_t accumbytes;
uint64_t accumbytes_unbiased;
};
typedef enum {
prof_tctx_state_initializing,
prof_tctx_state_nominal,
prof_tctx_state_dumping,
prof_tctx_state_purgatory /* Dumper must finish destroying. */
} prof_tctx_state_t;
struct prof_tctx_s {
/* Thread data for thread that performed the allocation. */
prof_tdata_t *tdata;
/*
* Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
* defunct during teardown.
*/
uint64_t thr_uid;
uint64_t thr_discrim;
/*
* Reference count of how many times this tctx object is referenced in
* recent allocation / deallocation records, protected by tdata->lock.
*/
uint64_t recent_count;
/* Profiling counters, protected by tdata->lock. */
prof_cnt_t cnts;
/* Associated global context. */
prof_gctx_t *gctx;
/*
* UID that distinguishes multiple tctx's created by the same thread,
* but coexisting in gctx->tctxs. There are two ways that such
* coexistence can occur:
* - A dumper thread can cause a tctx to be retained in the purgatory
* state.
* - Although a single "producer" thread must create all tctx's which
* share the same thr_uid, multiple "consumers" can each concurrently
* execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
* gets called once each time cnts.cur{objs,bytes} drop to 0, but this
* threshold can be hit again before the first consumer finishes
* executing prof_tctx_destroy().
*/
uint64_t tctx_uid;
/* Linkage into gctx's tctxs. */
rb_node(prof_tctx_t) tctx_link;
/*
* True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
* sample vs destroy race.
*/
bool prepared;
/* Current dump-related state, protected by gctx->lock. */
prof_tctx_state_t state;
/*
* Copy of cnts snapshotted during early dump phase, protected by
* dump_mtx.
*/
prof_cnt_t dump_cnts;
};
typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
struct prof_info_s {
/* Time when the allocation was made. */
nstime_t alloc_time;
/* Points to the prof_tctx_t corresponding to the allocation. */
prof_tctx_t *alloc_tctx;
/* Allocation request size. */
size_t alloc_size;
};
struct prof_gctx_s {
/* Protects nlimbo, cnt_summed, and tctxs. */
malloc_mutex_t *lock;
/*
* Number of threads that currently cause this gctx to be in a state of
* limbo due to one of:
* - Initializing this gctx.
* - Initializing per thread counters associated with this gctx.
* - Preparing to destroy this gctx.
* - Dumping a heap profile that includes this gctx.
* nlimbo must be 1 (single destroyer) in order to safely destroy the
* gctx.
*/
unsigned nlimbo;
/*
* Tree of profile counters, one for each thread that has allocated in
* this context.
*/
prof_tctx_tree_t tctxs;
/* Linkage for tree of contexts to be dumped. */
rb_node(prof_gctx_t) dump_link;
/* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed;
/* Associated backtrace. */
prof_bt_t bt;
/* Backtrace vector, variable size, referred to by bt. */
void *vec[1];
};
typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
struct prof_tdata_s {
malloc_mutex_t *lock;
/* Monotonically increasing unique thread identifier. */
uint64_t thr_uid;
/*
* Monotonically increasing discriminator among tdata structures
* associated with the same thr_uid.
*/
uint64_t thr_discrim;
rb_node(prof_tdata_t) tdata_link;
/*
* Counter used to initialize prof_tctx_t's tctx_uid. No locking is
* necessary when incrementing this field, because only one thread ever
* does so.
*/
uint64_t tctx_uid_next;
/*
* Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
* backtraces for which it has non-zero allocation/deallocation counters
* associated with thread-specific prof_tctx_t objects. Other threads
* may write to prof_tctx_t contents when freeing associated objects.
*/
ckh_t bt2tctx;
/* Included in heap profile dumps if has content. */
char thread_name[PROF_THREAD_NAME_MAX_LEN];
/* State used to avoid dumping while operating on prof internals. */
bool enq;
bool enq_idump;
bool enq_gdump;
/*
* Set to true during an early dump phase for tdata's which are
* currently being dumped. New threads' tdata's have this initialized
* to false so that they aren't accidentally included in later dump
* phases.
*/
bool dumping;
/*
* True if profiling is active for this tdata's thread
* (thread.prof.active mallctl).
*/
bool active;
bool attached;
bool expired;
/* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed;
/* Backtrace vector, used for calls to prof_backtrace(). */
void **vec;
};
typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
struct prof_recent_s {
nstime_t alloc_time;
nstime_t dalloc_time;
ql_elm(prof_recent_t) link;
size_t size;
size_t usize;
atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
prof_tctx_t *alloc_tctx;
prof_tctx_t *dalloc_tctx;
};
#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */

View file

@ -1,94 +0,0 @@
#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H
#define JEMALLOC_INTERNAL_PROF_TYPES_H
typedef struct prof_bt_s prof_bt_t;
typedef struct prof_cnt_s prof_cnt_t;
typedef struct prof_tctx_s prof_tctx_t;
typedef struct prof_info_s prof_info_t;
typedef struct prof_gctx_s prof_gctx_t;
typedef struct prof_tdata_s prof_tdata_t;
typedef struct prof_recent_s prof_recent_t;
/* Option defaults. */
#ifdef JEMALLOC_PROF
# define PROF_PREFIX_DEFAULT "jeprof"
#else
# define PROF_PREFIX_DEFAULT ""
#endif
#define LG_PROF_SAMPLE_DEFAULT 19
#define LG_PROF_INTERVAL_DEFAULT -1
/*
* Hard limit on stack backtrace depth. The version of prof_backtrace() that
* is based on __builtin_return_address() necessarily has a hard-coded number
* of backtrace frame handlers, and should be kept in sync with this setting.
*/
#ifdef JEMALLOC_PROF_GCC
# define PROF_BT_MAX_LIMIT 256
#else
# define PROF_BT_MAX_LIMIT UINT_MAX
#endif
#define PROF_BT_MAX_DEFAULT 128
/* Initial hash table size. */
#define PROF_CKH_MINITEMS 64
/* Size of memory buffer to use when writing dump files. */
#ifndef JEMALLOC_PROF
/* Minimize memory bloat for non-prof builds. */
# define PROF_DUMP_BUFSIZE 1
#elif defined(JEMALLOC_DEBUG)
/* Use a small buffer size in debug build, mainly to facilitate testing. */
# define PROF_DUMP_BUFSIZE 16
#else
# define PROF_DUMP_BUFSIZE 65536
#endif
/* Size of size class related tables */
#ifdef JEMALLOC_PROF
# define PROF_SC_NSIZES SC_NSIZES
#else
/* Minimize memory bloat for non-prof builds. */
# define PROF_SC_NSIZES 1
#endif
/* Size of stack-allocated buffer used by prof_printf(). */
#define PROF_PRINTF_BUFSIZE 128
/*
* Number of mutexes shared among all gctx's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NCTX_LOCKS 1024
/*
* Number of mutexes shared among all tdata's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NTDATA_LOCKS 256
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
#else
# define PROF_DUMP_FILENAME_LEN 1
#endif
/* Default number of recent allocations to record. */
#define PROF_RECENT_ALLOC_MAX_DEFAULT 0
/* Thread name storage size limit. */
#define PROF_THREAD_NAME_MAX_LEN 16
/*
* Minimum required alignment for sampled allocations. Over-aligning sampled
* allocations allows us to quickly identify them on the dalloc path without
* resorting to metadata lookup.
*/
#define PROF_SAMPLE_ALIGNMENT PAGE
#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */

View file

@ -0,0 +1,198 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_H
#define JEMALLOC_INTERNAL_TCACHE_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/cache_bin.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/thread_event_registry.h"
#include "jemalloc/internal/ticker.h"
/* Forward decl; only base_t * is used as a pointer arg below. */
typedef struct base_s base_t;
/******************************************************************************/
/* TYPES */
/******************************************************************************/
typedef struct tcache_slow_s tcache_slow_t;
typedef struct tcache_s tcache_t;
typedef struct tcaches_s tcaches_t;
/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
#define TCACHE_ZERO_INITIALIZER \
{ 0 }
#define TCACHE_SLOW_ZERO_INITIALIZER \
{ \
{ 0 } \
}
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
#define TCACHE_ENABLED_ZERO_INITIALIZER false
/* Used for explicit tcache only. Means flushed but not destroyed. */
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
#define TCACHE_NBINS_MAX \
(SC_NBINS \
+ SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \
+ 1)
#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
#define TCACHE_GC_LARGE_NBINS_MAX 1
/******************************************************************************/
/* STRUCTS */
/******************************************************************************/
/*
* The tcache state is split into the slow and hot path data. Each has a
* pointer to the other, and the data always comes in pairs. The layout of each
* of them varies in practice; tcache_slow lives in the TSD for the automatic
* tcache, and as part of a dynamic allocation for manual allocations. Keeping
* a pointer to tcache_slow lets us treat these cases uniformly, rather than
* splitting up the tcache [de]allocation code into those paths called with the
* TSD tcache and those called with a manual tcache.
*/
struct tcache_slow_s {
/*
* The descriptor lets the arena find our cache bins without seeing the
* tcache definition. This enables arenas to aggregate stats across
* tcaches without having a tcache dependency.
*/
cache_bin_array_descriptor_t cache_bin_array_descriptor;
/* The arena this tcache is associated with. */
arena_t *arena;
/* The number of bins activated in the tcache. */
unsigned tcache_nbins;
/* Last time GC has been performed. */
nstime_t last_gc_time;
/* Next bin to GC. */
szind_t next_gc_bin;
szind_t next_gc_bin_small;
szind_t next_gc_bin_large;
/* For small bins, help determine how many items to fill at a time. */
cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS];
/* For small bins, whether has been refilled since last GC. */
bool bin_refilled[SC_NBINS];
/*
* For small bins, the number of items we can pretend to flush before
* actually flushing.
*/
uint8_t bin_flush_delay_items[SC_NBINS];
/*
* The start of the allocation containing the dynamic allocation for
* either the cache bins alone, or the cache bin memory as well as this
* tcache_slow_t and its associated tcache_t.
*/
void *dyn_alloc;
/* The associated bins. */
tcache_t *tcache;
};
struct tcache_s {
tcache_slow_t *tcache_slow;
cache_bin_t bins[TCACHE_NBINS_MAX];
};
/* Linkage for list of available (previously used) explicit tcache IDs. */
struct tcaches_s {
union {
tcache_t *tcache;
tcaches_t *next;
};
};
/******************************************************************************/
/* EXTERNS */
/******************************************************************************/
extern bool opt_tcache;
extern size_t opt_tcache_max;
extern ssize_t opt_lg_tcache_nslots_mul;
extern unsigned opt_tcache_nslots_small_min;
extern unsigned opt_tcache_nslots_small_max;
extern unsigned opt_tcache_nslots_large;
extern ssize_t opt_lg_tcache_shift;
extern size_t opt_tcache_gc_incr_bytes;
extern size_t opt_tcache_gc_delay_bytes;
extern unsigned opt_lg_tcache_flush_small_div;
extern unsigned opt_lg_tcache_flush_large_div;
/*
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
* large-object bins. This is only used during threads initialization and
* changing it will not reflect on initialized threads as expected. Thus,
* it should not be changed on the fly. To change the number of tcache bins
* in use, refer to tcache_nbins of each tcache.
*/
extern unsigned global_do_not_change_tcache_nbins;
/*
* Maximum cached size class. Same as above, this is only used during threads
* initialization and should not be changed. To change the maximum cached size
* class, refer to tcache_max of each tcache.
*/
extern size_t global_do_not_change_tcache_maxclass;
/*
* Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
* usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are
* completely disjoint from this data structure. tcaches starts off as a sparse
* array, so it has no physical memory footprint until individual pages are
* touched. This allows the entire array to be allocated the first time an
* explicit tcache is created without a disproportionate impact on memory usage.
*/
extern tcaches_t *tcaches;
size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
cache_bin_t *cache_bin, szind_t binind, unsigned rem);
void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
cache_bin_t *cache_bin, szind_t binind, unsigned rem);
void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
cache_bin_t *cache_bin, szind_t binind, bool is_small);
bool tcache_bin_info_default_init(
const char *bin_settings_segment_cur, size_t len_left);
bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
bool tcache_bin_ncached_max_read(
tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max);
void tcache_arena_reassociate(
tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena);
tcache_t *tcache_create_explicit(tsd_t *tsd);
bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
void tcache_cleanup(tsd_t *tsd);
bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
void tcaches_flush(tsd_t *tsd, unsigned ind);
void tcaches_destroy(tsd_t *tsd, unsigned ind);
bool tcache_boot(tsdn_t *tsdn, base_t *base);
void tcache_arena_associate(
tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena);
cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor(
tsdn_t *tsdn, arena_t *arena);
void tcache_prefork(tsdn_t *tsdn);
void tcache_postfork_parent(tsdn_t *tsdn);
void tcache_postfork_child(tsdn_t *tsdn);
void tcache_flush(tsd_t *tsd);
bool tsd_tcache_enabled_data_init(tsd_t *tsd);
void tcache_enabled_set(tsd_t *tsd, bool enabled);
extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
size_t alignment);
void tcache_assert_initialized(tcache_t *tcache);
extern te_base_cb_t tcache_gc_te_handler;
#endif /* JEMALLOC_INTERNAL_TCACHE_H */

View file

@ -1,91 +0,0 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/cache_bin.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/tcache_types.h"
#include "jemalloc/internal/thread_event_registry.h"
extern bool opt_tcache;
extern size_t opt_tcache_max;
extern ssize_t opt_lg_tcache_nslots_mul;
extern unsigned opt_tcache_nslots_small_min;
extern unsigned opt_tcache_nslots_small_max;
extern unsigned opt_tcache_nslots_large;
extern ssize_t opt_lg_tcache_shift;
extern size_t opt_tcache_gc_incr_bytes;
extern size_t opt_tcache_gc_delay_bytes;
extern unsigned opt_lg_tcache_flush_small_div;
extern unsigned opt_lg_tcache_flush_large_div;
/*
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
* large-object bins. This is only used during threads initialization and
* changing it will not reflect on initialized threads as expected. Thus,
* it should not be changed on the fly. To change the number of tcache bins
* in use, refer to tcache_nbins of each tcache.
*/
extern unsigned global_do_not_change_tcache_nbins;
/*
* Maximum cached size class. Same as above, this is only used during threads
* initialization and should not be changed. To change the maximum cached size
* class, refer to tcache_max of each tcache.
*/
extern size_t global_do_not_change_tcache_maxclass;
/*
* Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
* usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are
* completely disjoint from this data structure. tcaches starts off as a sparse
* array, so it has no physical memory footprint until individual pages are
* touched. This allows the entire array to be allocated the first time an
* explicit tcache is created without a disproportionate impact on memory usage.
*/
extern tcaches_t *tcaches;
size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
cache_bin_t *cache_bin, szind_t binind, unsigned rem);
void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
cache_bin_t *cache_bin, szind_t binind, unsigned rem);
void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
cache_bin_t *cache_bin, szind_t binind, bool is_small);
bool tcache_bin_info_default_init(
const char *bin_settings_segment_cur, size_t len_left);
bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
bool tcache_bin_ncached_max_read(
tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max);
void tcache_arena_reassociate(
tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena);
tcache_t *tcache_create_explicit(tsd_t *tsd);
bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
void tcache_cleanup(tsd_t *tsd);
bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
void tcaches_flush(tsd_t *tsd, unsigned ind);
void tcaches_destroy(tsd_t *tsd, unsigned ind);
bool tcache_boot(tsdn_t *tsdn, base_t *base);
void tcache_arena_associate(
tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena);
cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor(
tsdn_t *tsdn, arena_t *arena);
void tcache_prefork(tsdn_t *tsdn);
void tcache_postfork_parent(tsdn_t *tsdn);
void tcache_postfork_child(tsdn_t *tsdn);
void tcache_flush(tsd_t *tsd);
bool tsd_tcache_enabled_data_init(tsd_t *tsd);
void tcache_enabled_set(tsd_t *tsd, bool enabled);
extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
size_t alignment);
void tcache_assert_initialized(tcache_t *tcache);
extern te_base_cb_t tcache_gc_te_handler;
#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */

View file

@ -10,7 +10,7 @@
#include "jemalloc/internal/san.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/tcache_externs.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/util.h"
static inline bool

View file

@ -1,72 +0,0 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/cache_bin.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/tcache_types.h"
#include "jemalloc/internal/ticker.h"
/*
* The tcache state is split into the slow and hot path data. Each has a
* pointer to the other, and the data always comes in pairs. The layout of each
* of them varies in practice; tcache_slow lives in the TSD for the automatic
* tcache, and as part of a dynamic allocation for manual allocations. Keeping
* a pointer to tcache_slow lets us treat these cases uniformly, rather than
* splitting up the tcache [de]allocation code into those paths called with the
* TSD tcache and those called with a manual tcache.
*/
struct tcache_slow_s {
/*
* The descriptor lets the arena find our cache bins without seeing the
* tcache definition. This enables arenas to aggregate stats across
* tcaches without having a tcache dependency.
*/
cache_bin_array_descriptor_t cache_bin_array_descriptor;
/* The arena this tcache is associated with. */
arena_t *arena;
/* The number of bins activated in the tcache. */
unsigned tcache_nbins;
/* Last time GC has been performed. */
nstime_t last_gc_time;
/* Next bin to GC. */
szind_t next_gc_bin;
szind_t next_gc_bin_small;
szind_t next_gc_bin_large;
/* For small bins, help determine how many items to fill at a time. */
cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS];
/* For small bins, whether has been refilled since last GC. */
bool bin_refilled[SC_NBINS];
/*
* For small bins, the number of items we can pretend to flush before
* actually flushing.
*/
uint8_t bin_flush_delay_items[SC_NBINS];
/*
* The start of the allocation containing the dynamic allocation for
* either the cache bins alone, or the cache bin memory as well as this
* tcache_slow_t and its associated tcache_t.
*/
void *dyn_alloc;
/* The associated bins. */
tcache_t *tcache;
};
struct tcache_s {
tcache_slow_t *tcache_slow;
cache_bin_t bins[TCACHE_NBINS_MAX];
};
/* Linkage for list of available (previously used) explicit tcache IDs. */
struct tcaches_s {
union {
tcache_t *tcache;
tcaches_t *next;
};
};
#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */

View file

@ -1,37 +0,0 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
#define JEMALLOC_INTERNAL_TCACHE_TYPES_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/sc.h"
typedef struct tcache_slow_s tcache_slow_t;
typedef struct tcache_s tcache_t;
typedef struct tcaches_s tcaches_t;
/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
#define TCACHE_ZERO_INITIALIZER \
{ 0 }
#define TCACHE_SLOW_ZERO_INITIALIZER \
{ \
{ 0 } \
}
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
#define TCACHE_ENABLED_ZERO_INITIALIZER false
/* Used for explicit tcache only. Means flushed but not destroyed. */
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
#define TCACHE_NBINS_MAX \
(SC_NBINS \
+ SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \
+ 1)
#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
#define TCACHE_GC_LARGE_NBINS_MAX 1
#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */

View file

@ -9,11 +9,12 @@
#include "jemalloc/internal/tsd_binshards.h"
#include "jemalloc/internal/jemalloc_internal_externs.h"
#include "jemalloc/internal/peak.h"
#include "jemalloc/internal/prof_types.h"
#include "jemalloc/internal/rtree_tsd.h"
#include "jemalloc/internal/tcache_structs.h"
#include "jemalloc/internal/tcache_types.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/thread_event_registry.h"
/* Forward decl; tsd_internals.h only uses prof_tdata_t as a pointer type. */
typedef struct prof_tdata_s prof_tdata_t;
#include "jemalloc/internal/tsd_types.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/witness.h"

View file

@ -3,9 +3,9 @@
#include "jemalloc/internal/thread_event.h"
#include "jemalloc/internal/thread_event_registry.h"
#include "jemalloc/internal/tcache_externs.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/peak_event.h"
#include "jemalloc/internal/prof_externs.h"
#include "jemalloc/internal/prof.h"
#include "jemalloc/internal/stats.h"
static malloc_mutex_t uevents_mu;