mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-06-02 02:04:20 +03:00
Consolidate prof_* and tcache_* header splits
Each of these components had a four-way split (_types, _structs, _externs, _inlines) that dates back to the old "include each section multiple times from a master file" pattern. With Step 2's edata <-> prof_types decoupling, merging _types + _structs + _externs into one header per component no longer risks recreating an include cycle. - prof.h replaces prof_types.h + prof_structs.h + prof_externs.h. - tcache.h replaces tcache_types.h + tcache_structs.h + tcache_externs.h. prof_inlines.h and tcache_inlines.h are kept separate: prof_inlines.h sits at the bottom of the dependency layering, and tcache_inlines.h's include of arena_externs.h is the asymmetric cycle-breaker that keeps the arena<->tcache symbol cycle from becoming an include cycle. Two surprises required adjustments beyond a straight concatenation: 1. te_prof_sample_event_lookahead was a JEMALLOC_ALWAYS_INLINE function defined in prof_externs.h, but its body calls tsd_thread_allocated_* accessors that only exist after tsd inlines are loaded. The original layering hid this because prof_externs.h was only included near the bottom of jemalloc_internal_includes.h. After consolidation, tsd_internals.h's includes pull prof.h in earlier, exposing the ordering dependency. Moved the inline to prof_inlines.h (where inline definitions belong anyway) and left only the related extern in prof.h. 2. base.h was included from prof_externs.h and tcache_externs.h purely for base_t * pointer arguments on a couple of declarations. Carrying that include into the merged prof.h / tcache.h would pull ehooks.h (-> tsd.h) into tsd_internals.h before tsd_internals.h finishes declaring its tsd accessors. Replaced with a forward declaration of base_t in each merged file. Similarly, tsd_internals.h's prior #include of prof_types.h becomes a forward decl of prof_tdata_t (the only prof symbol it references, and only as a pointer), and large.h needs a forward decl of prof_info_t because large.h is loaded before prof.h in the new master ordering. No inline / static qualifiers are dropped; only the one inline moves files. #ifdef blocks (JEMALLOC_PROF, JEMALLOC_PROF_LIBGCC, JEMALLOC_PROF_GCC, JEMALLOC_DEBUG) are kept intact.
This commit is contained in:
parent
fb92d8a916
commit
ab4c178444
16 changed files with 666 additions and 664 deletions
|
|
@ -10,8 +10,7 @@
|
|||
#include "jemalloc/internal/jemalloc_internal_types.h"
|
||||
#include "jemalloc/internal/large.h"
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
#include "jemalloc/internal/prof_externs.h"
|
||||
#include "jemalloc/internal/prof_structs.h"
|
||||
#include "jemalloc/internal/prof.h"
|
||||
#include "jemalloc/internal/rtree.h"
|
||||
#include "jemalloc/internal/safety_check.h"
|
||||
#include "jemalloc/internal/sc.h"
|
||||
|
|
|
|||
|
|
@ -41,16 +41,12 @@
|
|||
/******************************************************************************/
|
||||
|
||||
#include "jemalloc/internal/arena_types.h"
|
||||
#include "jemalloc/internal/tcache_types.h"
|
||||
#include "jemalloc/internal/prof_types.h"
|
||||
|
||||
/******************************************************************************/
|
||||
/* STRUCTS */
|
||||
/******************************************************************************/
|
||||
|
||||
#include "jemalloc/internal/prof_structs.h"
|
||||
#include "jemalloc/internal/arena_structs.h"
|
||||
#include "jemalloc/internal/tcache_structs.h"
|
||||
|
||||
/******************************************************************************/
|
||||
/* EXTERNS */
|
||||
|
|
@ -59,9 +55,9 @@
|
|||
#include "jemalloc/internal/jemalloc_internal_externs.h"
|
||||
#include "jemalloc/internal/arena_externs.h"
|
||||
#include "jemalloc/internal/large.h"
|
||||
#include "jemalloc/internal/tcache_externs.h"
|
||||
#include "jemalloc/internal/tcache.h"
|
||||
#include "jemalloc/internal/malloc_dispatch_externs.h"
|
||||
#include "jemalloc/internal/prof_externs.h"
|
||||
#include "jemalloc/internal/prof.h"
|
||||
#include "jemalloc/internal/background_thread.h"
|
||||
|
||||
/******************************************************************************/
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
#include "jemalloc/internal/bit_util.h"
|
||||
#include "jemalloc/internal/jemalloc_internal_types.h"
|
||||
#include "jemalloc/internal/sc.h"
|
||||
#include "jemalloc/internal/tcache_externs.h"
|
||||
#include "jemalloc/internal/tcache.h"
|
||||
#include "jemalloc/internal/ticker.h"
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@
|
|||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/edata.h"
|
||||
|
||||
/* Forward decl; only prof_info_t * is used as a pointer arg below. */
|
||||
typedef struct prof_info_s prof_info_t;
|
||||
|
||||
void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
|
||||
void *large_palloc(
|
||||
tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
|
||||
|
|
|
|||
427
include/jemalloc/internal/prof.h
Normal file
427
include/jemalloc/internal/prof.h
Normal file
|
|
@ -0,0 +1,427 @@
|
|||
#ifndef JEMALLOC_INTERNAL_PROF_H
|
||||
#define JEMALLOC_INTERNAL_PROF_H
|
||||
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/ckh.h"
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
#include "jemalloc/internal/prng.h"
|
||||
#include "jemalloc/internal/prof_hook.h"
|
||||
#include "jemalloc/internal/rb.h"
|
||||
#include "jemalloc/internal/thread_event_registry.h"
|
||||
|
||||
/* Forward decl; only base_t * is used as a pointer arg below. */
|
||||
typedef struct base_s base_t;
|
||||
|
||||
/******************************************************************************/
|
||||
/* TYPES */
|
||||
/******************************************************************************/
|
||||
|
||||
typedef struct prof_bt_s prof_bt_t;
|
||||
typedef struct prof_cnt_s prof_cnt_t;
|
||||
typedef struct prof_tctx_s prof_tctx_t;
|
||||
typedef struct prof_info_s prof_info_t;
|
||||
typedef struct prof_gctx_s prof_gctx_t;
|
||||
typedef struct prof_tdata_s prof_tdata_t;
|
||||
typedef struct prof_recent_s prof_recent_t;
|
||||
|
||||
/* Option defaults. */
|
||||
#ifdef JEMALLOC_PROF
|
||||
# define PROF_PREFIX_DEFAULT "jeprof"
|
||||
#else
|
||||
# define PROF_PREFIX_DEFAULT ""
|
||||
#endif
|
||||
#define LG_PROF_SAMPLE_DEFAULT 19
|
||||
#define LG_PROF_INTERVAL_DEFAULT -1
|
||||
|
||||
/*
|
||||
* Hard limit on stack backtrace depth. The version of prof_backtrace() that
|
||||
* is based on __builtin_return_address() necessarily has a hard-coded number
|
||||
* of backtrace frame handlers, and should be kept in sync with this setting.
|
||||
*/
|
||||
#ifdef JEMALLOC_PROF_GCC
|
||||
# define PROF_BT_MAX_LIMIT 256
|
||||
#else
|
||||
# define PROF_BT_MAX_LIMIT UINT_MAX
|
||||
#endif
|
||||
#define PROF_BT_MAX_DEFAULT 128
|
||||
|
||||
/* Initial hash table size. */
|
||||
#define PROF_CKH_MINITEMS 64
|
||||
|
||||
/* Size of memory buffer to use when writing dump files. */
|
||||
#ifndef JEMALLOC_PROF
|
||||
/* Minimize memory bloat for non-prof builds. */
|
||||
# define PROF_DUMP_BUFSIZE 1
|
||||
#elif defined(JEMALLOC_DEBUG)
|
||||
/* Use a small buffer size in debug build, mainly to facilitate testing. */
|
||||
# define PROF_DUMP_BUFSIZE 16
|
||||
#else
|
||||
# define PROF_DUMP_BUFSIZE 65536
|
||||
#endif
|
||||
|
||||
/* Size of size class related tables */
|
||||
#ifdef JEMALLOC_PROF
|
||||
# define PROF_SC_NSIZES SC_NSIZES
|
||||
#else
|
||||
/* Minimize memory bloat for non-prof builds. */
|
||||
# define PROF_SC_NSIZES 1
|
||||
#endif
|
||||
|
||||
/* Size of stack-allocated buffer used by prof_printf(). */
|
||||
#define PROF_PRINTF_BUFSIZE 128
|
||||
|
||||
/*
|
||||
* Number of mutexes shared among all gctx's. No space is allocated for these
|
||||
* unless profiling is enabled, so it's okay to over-provision.
|
||||
*/
|
||||
#define PROF_NCTX_LOCKS 1024
|
||||
|
||||
/*
|
||||
* Number of mutexes shared among all tdata's. No space is allocated for these
|
||||
* unless profiling is enabled, so it's okay to over-provision.
|
||||
*/
|
||||
#define PROF_NTDATA_LOCKS 256
|
||||
|
||||
/* Minimize memory bloat for non-prof builds. */
|
||||
#ifdef JEMALLOC_PROF
|
||||
# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
|
||||
#else
|
||||
# define PROF_DUMP_FILENAME_LEN 1
|
||||
#endif
|
||||
|
||||
/* Default number of recent allocations to record. */
|
||||
#define PROF_RECENT_ALLOC_MAX_DEFAULT 0
|
||||
|
||||
/* Thread name storage size limit. */
|
||||
#define PROF_THREAD_NAME_MAX_LEN 16
|
||||
|
||||
/*
|
||||
* Minimum required alignment for sampled allocations. Over-aligning sampled
|
||||
* allocations allows us to quickly identify them on the dalloc path without
|
||||
* resorting to metadata lookup.
|
||||
*/
|
||||
#define PROF_SAMPLE_ALIGNMENT PAGE
|
||||
#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
|
||||
|
||||
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
|
||||
#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
|
||||
|
||||
/******************************************************************************/
|
||||
/* STRUCTS */
|
||||
/******************************************************************************/
|
||||
|
||||
struct prof_bt_s {
|
||||
/* Backtrace, stored as len program counters. */
|
||||
void **vec;
|
||||
unsigned len;
|
||||
};
|
||||
|
||||
#ifdef JEMALLOC_PROF_LIBGCC
|
||||
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
|
||||
typedef struct {
|
||||
void **vec;
|
||||
unsigned *len;
|
||||
unsigned max;
|
||||
} prof_unwind_data_t;
|
||||
#endif
|
||||
|
||||
struct prof_cnt_s {
|
||||
/* Profiling counters. */
|
||||
uint64_t curobjs;
|
||||
uint64_t curobjs_shifted_unbiased;
|
||||
uint64_t curbytes;
|
||||
uint64_t curbytes_unbiased;
|
||||
uint64_t accumobjs;
|
||||
uint64_t accumobjs_shifted_unbiased;
|
||||
uint64_t accumbytes;
|
||||
uint64_t accumbytes_unbiased;
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
prof_tctx_state_initializing,
|
||||
prof_tctx_state_nominal,
|
||||
prof_tctx_state_dumping,
|
||||
prof_tctx_state_purgatory /* Dumper must finish destroying. */
|
||||
} prof_tctx_state_t;
|
||||
|
||||
struct prof_tctx_s {
|
||||
/* Thread data for thread that performed the allocation. */
|
||||
prof_tdata_t *tdata;
|
||||
|
||||
/*
|
||||
* Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
|
||||
* defunct during teardown.
|
||||
*/
|
||||
uint64_t thr_uid;
|
||||
uint64_t thr_discrim;
|
||||
|
||||
/*
|
||||
* Reference count of how many times this tctx object is referenced in
|
||||
* recent allocation / deallocation records, protected by tdata->lock.
|
||||
*/
|
||||
uint64_t recent_count;
|
||||
|
||||
/* Profiling counters, protected by tdata->lock. */
|
||||
prof_cnt_t cnts;
|
||||
|
||||
/* Associated global context. */
|
||||
prof_gctx_t *gctx;
|
||||
|
||||
/*
|
||||
* UID that distinguishes multiple tctx's created by the same thread,
|
||||
* but coexisting in gctx->tctxs. There are two ways that such
|
||||
* coexistence can occur:
|
||||
* - A dumper thread can cause a tctx to be retained in the purgatory
|
||||
* state.
|
||||
* - Although a single "producer" thread must create all tctx's which
|
||||
* share the same thr_uid, multiple "consumers" can each concurrently
|
||||
* execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
|
||||
* gets called once each time cnts.cur{objs,bytes} drop to 0, but this
|
||||
* threshold can be hit again before the first consumer finishes
|
||||
* executing prof_tctx_destroy().
|
||||
*/
|
||||
uint64_t tctx_uid;
|
||||
|
||||
/* Linkage into gctx's tctxs. */
|
||||
rb_node(prof_tctx_t) tctx_link;
|
||||
|
||||
/*
|
||||
* True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
|
||||
* sample vs destroy race.
|
||||
*/
|
||||
bool prepared;
|
||||
|
||||
/* Current dump-related state, protected by gctx->lock. */
|
||||
prof_tctx_state_t state;
|
||||
|
||||
/*
|
||||
* Copy of cnts snapshotted during early dump phase, protected by
|
||||
* dump_mtx.
|
||||
*/
|
||||
prof_cnt_t dump_cnts;
|
||||
};
|
||||
typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
|
||||
|
||||
struct prof_info_s {
|
||||
/* Time when the allocation was made. */
|
||||
nstime_t alloc_time;
|
||||
/* Points to the prof_tctx_t corresponding to the allocation. */
|
||||
prof_tctx_t *alloc_tctx;
|
||||
/* Allocation request size. */
|
||||
size_t alloc_size;
|
||||
};
|
||||
|
||||
struct prof_gctx_s {
|
||||
/* Protects nlimbo, cnt_summed, and tctxs. */
|
||||
malloc_mutex_t *lock;
|
||||
|
||||
/*
|
||||
* Number of threads that currently cause this gctx to be in a state of
|
||||
* limbo due to one of:
|
||||
* - Initializing this gctx.
|
||||
* - Initializing per thread counters associated with this gctx.
|
||||
* - Preparing to destroy this gctx.
|
||||
* - Dumping a heap profile that includes this gctx.
|
||||
* nlimbo must be 1 (single destroyer) in order to safely destroy the
|
||||
* gctx.
|
||||
*/
|
||||
unsigned nlimbo;
|
||||
|
||||
/*
|
||||
* Tree of profile counters, one for each thread that has allocated in
|
||||
* this context.
|
||||
*/
|
||||
prof_tctx_tree_t tctxs;
|
||||
|
||||
/* Linkage for tree of contexts to be dumped. */
|
||||
rb_node(prof_gctx_t) dump_link;
|
||||
|
||||
/* Temporary storage for summation during dump. */
|
||||
prof_cnt_t cnt_summed;
|
||||
|
||||
/* Associated backtrace. */
|
||||
prof_bt_t bt;
|
||||
|
||||
/* Backtrace vector, variable size, referred to by bt. */
|
||||
void *vec[1];
|
||||
};
|
||||
typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
|
||||
|
||||
struct prof_tdata_s {
|
||||
malloc_mutex_t *lock;
|
||||
|
||||
/* Monotonically increasing unique thread identifier. */
|
||||
uint64_t thr_uid;
|
||||
|
||||
/*
|
||||
* Monotonically increasing discriminator among tdata structures
|
||||
* associated with the same thr_uid.
|
||||
*/
|
||||
uint64_t thr_discrim;
|
||||
|
||||
rb_node(prof_tdata_t) tdata_link;
|
||||
|
||||
/*
|
||||
* Counter used to initialize prof_tctx_t's tctx_uid. No locking is
|
||||
* necessary when incrementing this field, because only one thread ever
|
||||
* does so.
|
||||
*/
|
||||
uint64_t tctx_uid_next;
|
||||
|
||||
/*
|
||||
* Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
|
||||
* backtraces for which it has non-zero allocation/deallocation counters
|
||||
* associated with thread-specific prof_tctx_t objects. Other threads
|
||||
* may write to prof_tctx_t contents when freeing associated objects.
|
||||
*/
|
||||
ckh_t bt2tctx;
|
||||
|
||||
/* Included in heap profile dumps if has content. */
|
||||
char thread_name[PROF_THREAD_NAME_MAX_LEN];
|
||||
|
||||
/* State used to avoid dumping while operating on prof internals. */
|
||||
bool enq;
|
||||
bool enq_idump;
|
||||
bool enq_gdump;
|
||||
|
||||
/*
|
||||
* Set to true during an early dump phase for tdata's which are
|
||||
* currently being dumped. New threads' tdata's have this initialized
|
||||
* to false so that they aren't accidentally included in later dump
|
||||
* phases.
|
||||
*/
|
||||
bool dumping;
|
||||
|
||||
/*
|
||||
* True if profiling is active for this tdata's thread
|
||||
* (thread.prof.active mallctl).
|
||||
*/
|
||||
bool active;
|
||||
|
||||
bool attached;
|
||||
bool expired;
|
||||
|
||||
/* Temporary storage for summation during dump. */
|
||||
prof_cnt_t cnt_summed;
|
||||
|
||||
/* Backtrace vector, used for calls to prof_backtrace(). */
|
||||
void **vec;
|
||||
};
|
||||
typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
|
||||
|
||||
struct prof_recent_s {
|
||||
nstime_t alloc_time;
|
||||
nstime_t dalloc_time;
|
||||
|
||||
ql_elm(prof_recent_t) link;
|
||||
size_t size;
|
||||
size_t usize;
|
||||
atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
|
||||
prof_tctx_t *alloc_tctx;
|
||||
prof_tctx_t *dalloc_tctx;
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
/* EXTERNS */
|
||||
/******************************************************************************/
|
||||
|
||||
extern bool opt_prof;
|
||||
extern bool opt_prof_active;
|
||||
extern bool opt_prof_thread_active_init;
|
||||
extern unsigned opt_prof_bt_max;
|
||||
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
|
||||
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
|
||||
extern bool opt_prof_gdump; /* High-water memory dumping. */
|
||||
extern bool opt_prof_final; /* Final profile dumping. */
|
||||
extern bool opt_prof_leak; /* Dump leak summary at exit. */
|
||||
extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */
|
||||
extern bool opt_prof_accum; /* Report cumulative bytes. */
|
||||
extern bool opt_prof_log; /* Turn logging on at boot. */
|
||||
extern char opt_prof_prefix[
|
||||
/* Minimize memory bloat for non-prof builds. */
|
||||
#ifdef JEMALLOC_PROF
|
||||
PATH_MAX +
|
||||
#endif
|
||||
1];
|
||||
extern bool opt_prof_unbias;
|
||||
|
||||
/* Include pid namespace in profile file names. */
|
||||
extern bool opt_prof_pid_namespace;
|
||||
|
||||
/* For recording recent allocations */
|
||||
extern ssize_t opt_prof_recent_alloc_max;
|
||||
|
||||
/* Whether to use thread name provided by the system or by mallctl. */
|
||||
extern bool opt_prof_sys_thread_name;
|
||||
|
||||
/* Whether to record per size class counts and request size totals. */
|
||||
extern bool opt_prof_stats;
|
||||
|
||||
/* Accessed via prof_active_[gs]et{_unlocked,}(). */
|
||||
extern bool prof_active_state;
|
||||
|
||||
/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
|
||||
extern bool prof_gdump_val;
|
||||
|
||||
/* Profile dump interval, measured in bytes allocated. */
|
||||
extern uint64_t prof_interval;
|
||||
|
||||
/*
|
||||
* Initialized as opt_lg_prof_sample, and potentially modified during profiling
|
||||
* resets.
|
||||
*/
|
||||
extern size_t lg_prof_sample;
|
||||
|
||||
extern bool prof_booted;
|
||||
|
||||
void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
|
||||
prof_backtrace_hook_t prof_backtrace_hook_get(void);
|
||||
|
||||
void prof_dump_hook_set(prof_dump_hook_t hook);
|
||||
prof_dump_hook_t prof_dump_hook_get(void);
|
||||
|
||||
void prof_sample_hook_set(prof_sample_hook_t hook);
|
||||
prof_sample_hook_t prof_sample_hook_get(void);
|
||||
|
||||
void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
|
||||
prof_sample_free_hook_t prof_sample_free_hook_get(void);
|
||||
|
||||
/* Functions only accessed in prof_inlines.h */
|
||||
prof_tdata_t *prof_tdata_init(tsd_t *tsd);
|
||||
prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
|
||||
|
||||
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
|
||||
void prof_malloc_sample_object(
|
||||
tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx);
|
||||
void prof_free_sampled_object(
|
||||
tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info);
|
||||
prof_tctx_t *prof_tctx_create(tsd_t *tsd);
|
||||
void prof_idump(tsdn_t *tsdn);
|
||||
bool prof_mdump(tsd_t *tsd, const char *filename);
|
||||
void prof_gdump(tsdn_t *tsdn);
|
||||
|
||||
void prof_tdata_cleanup(tsd_t *tsd);
|
||||
bool prof_active_get(tsdn_t *tsdn);
|
||||
bool prof_active_set(tsdn_t *tsdn, bool active);
|
||||
const char *prof_thread_name_get(tsd_t *tsd);
|
||||
int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
|
||||
bool prof_thread_active_get(tsd_t *tsd);
|
||||
bool prof_thread_active_set(tsd_t *tsd, bool active);
|
||||
bool prof_thread_active_init_get(tsdn_t *tsdn);
|
||||
bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
|
||||
bool prof_gdump_get(tsdn_t *tsdn);
|
||||
bool prof_gdump_set(tsdn_t *tsdn, bool active);
|
||||
void prof_boot0(void);
|
||||
void prof_boot1(void);
|
||||
bool prof_boot2(tsd_t *tsd, base_t *base);
|
||||
void prof_prefork0(tsdn_t *tsdn);
|
||||
void prof_prefork1(tsdn_t *tsdn);
|
||||
void prof_postfork_parent(tsdn_t *tsdn);
|
||||
void prof_postfork_child(tsdn_t *tsdn);
|
||||
|
||||
uint64_t prof_sample_new_event_wait(tsd_t *tsd);
|
||||
uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
|
||||
|
||||
extern te_base_cb_t prof_sample_te_handler;
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_PROF_H */
|
||||
|
|
@ -1,132 +0,0 @@
|
|||
#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
|
||||
#define JEMALLOC_INTERNAL_PROF_EXTERNS_H
|
||||
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/base.h"
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
#include "jemalloc/internal/prof_hook.h"
|
||||
#include "jemalloc/internal/thread_event_registry.h"
|
||||
|
||||
extern bool opt_prof;
|
||||
extern bool opt_prof_active;
|
||||
extern bool opt_prof_thread_active_init;
|
||||
extern unsigned opt_prof_bt_max;
|
||||
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
|
||||
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
|
||||
extern bool opt_prof_gdump; /* High-water memory dumping. */
|
||||
extern bool opt_prof_final; /* Final profile dumping. */
|
||||
extern bool opt_prof_leak; /* Dump leak summary at exit. */
|
||||
extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */
|
||||
extern bool opt_prof_accum; /* Report cumulative bytes. */
|
||||
extern bool opt_prof_log; /* Turn logging on at boot. */
|
||||
extern char opt_prof_prefix[
|
||||
/* Minimize memory bloat for non-prof builds. */
|
||||
#ifdef JEMALLOC_PROF
|
||||
PATH_MAX +
|
||||
#endif
|
||||
1];
|
||||
extern bool opt_prof_unbias;
|
||||
|
||||
/* Include pid namespace in profile file names. */
|
||||
extern bool opt_prof_pid_namespace;
|
||||
|
||||
/* For recording recent allocations */
|
||||
extern ssize_t opt_prof_recent_alloc_max;
|
||||
|
||||
/* Whether to use thread name provided by the system or by mallctl. */
|
||||
extern bool opt_prof_sys_thread_name;
|
||||
|
||||
/* Whether to record per size class counts and request size totals. */
|
||||
extern bool opt_prof_stats;
|
||||
|
||||
/* Accessed via prof_active_[gs]et{_unlocked,}(). */
|
||||
extern bool prof_active_state;
|
||||
|
||||
/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
|
||||
extern bool prof_gdump_val;
|
||||
|
||||
/* Profile dump interval, measured in bytes allocated. */
|
||||
extern uint64_t prof_interval;
|
||||
|
||||
/*
|
||||
* Initialized as opt_lg_prof_sample, and potentially modified during profiling
|
||||
* resets.
|
||||
*/
|
||||
extern size_t lg_prof_sample;
|
||||
|
||||
extern bool prof_booted;
|
||||
|
||||
void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
|
||||
prof_backtrace_hook_t prof_backtrace_hook_get(void);
|
||||
|
||||
void prof_dump_hook_set(prof_dump_hook_t hook);
|
||||
prof_dump_hook_t prof_dump_hook_get(void);
|
||||
|
||||
void prof_sample_hook_set(prof_sample_hook_t hook);
|
||||
prof_sample_hook_t prof_sample_hook_get(void);
|
||||
|
||||
void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
|
||||
prof_sample_free_hook_t prof_sample_free_hook_get(void);
|
||||
|
||||
/* Functions only accessed in prof_inlines.h */
|
||||
prof_tdata_t *prof_tdata_init(tsd_t *tsd);
|
||||
prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
|
||||
|
||||
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
|
||||
void prof_malloc_sample_object(
|
||||
tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx);
|
||||
void prof_free_sampled_object(
|
||||
tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info);
|
||||
prof_tctx_t *prof_tctx_create(tsd_t *tsd);
|
||||
void prof_idump(tsdn_t *tsdn);
|
||||
bool prof_mdump(tsd_t *tsd, const char *filename);
|
||||
void prof_gdump(tsdn_t *tsdn);
|
||||
|
||||
void prof_tdata_cleanup(tsd_t *tsd);
|
||||
bool prof_active_get(tsdn_t *tsdn);
|
||||
bool prof_active_set(tsdn_t *tsdn, bool active);
|
||||
const char *prof_thread_name_get(tsd_t *tsd);
|
||||
int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
|
||||
bool prof_thread_active_get(tsd_t *tsd);
|
||||
bool prof_thread_active_set(tsd_t *tsd, bool active);
|
||||
bool prof_thread_active_init_get(tsdn_t *tsdn);
|
||||
bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
|
||||
bool prof_gdump_get(tsdn_t *tsdn);
|
||||
bool prof_gdump_set(tsdn_t *tsdn, bool active);
|
||||
void prof_boot0(void);
|
||||
void prof_boot1(void);
|
||||
bool prof_boot2(tsd_t *tsd, base_t *base);
|
||||
void prof_prefork0(tsdn_t *tsdn);
|
||||
void prof_prefork1(tsdn_t *tsdn);
|
||||
void prof_postfork_parent(tsdn_t *tsdn);
|
||||
void prof_postfork_child(tsdn_t *tsdn);
|
||||
|
||||
uint64_t prof_sample_new_event_wait(tsd_t *tsd);
|
||||
uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
|
||||
|
||||
/*
|
||||
* The lookahead functionality facilitates events to be able to lookahead, i.e.
|
||||
* without touching the event counters, to determine whether an event would be
|
||||
* triggered. The event counters are not advanced until the end of the
|
||||
* allocation / deallocation calls, so the lookahead can be useful if some
|
||||
* preparation work for some event must be done early in the allocation /
|
||||
* deallocation calls.
|
||||
*
|
||||
* Currently only the profiling sampling event needs the lookahead
|
||||
* functionality, so we don't yet define general purpose lookahead functions.
|
||||
*/
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
|
||||
if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
|
||||
return false;
|
||||
}
|
||||
/* The subtraction is intentionally susceptible to underflow. */
|
||||
uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize
|
||||
- tsd_thread_allocated_last_event_get(tsd);
|
||||
return accumbytes >= tsd_prof_sample_event_wait_get(tsd);
|
||||
}
|
||||
|
||||
extern te_base_cb_t prof_sample_te_handler;
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
|
||||
|
|
@ -4,12 +4,37 @@
|
|||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/arena_inlines_b.h"
|
||||
#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
|
||||
#include "jemalloc/internal/prof_externs.h"
|
||||
#include "jemalloc/internal/prof_structs.h"
|
||||
#include "jemalloc/internal/prof.h"
|
||||
#include "jemalloc/internal/safety_check.h"
|
||||
#include "jemalloc/internal/sz.h"
|
||||
#include "jemalloc/internal/thread_event.h"
|
||||
|
||||
/*
|
||||
* The lookahead functionality facilitates events to be able to lookahead, i.e.
|
||||
* without touching the event counters, to determine whether an event would be
|
||||
* triggered. The event counters are not advanced until the end of the
|
||||
* allocation / deallocation calls, so the lookahead can be useful if some
|
||||
* preparation work for some event must be done early in the allocation /
|
||||
* deallocation calls.
|
||||
*
|
||||
* Currently only the profiling sampling event needs the lookahead
|
||||
* functionality, so we don't yet define general purpose lookahead functions.
|
||||
*
|
||||
* Defined here rather than prof.h because the inline body depends on tsd
|
||||
* accessors that aren't visible until tsd inlines are loaded.
|
||||
*/
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
|
||||
if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
|
||||
return false;
|
||||
}
|
||||
/* The subtraction is intentionally susceptible to underflow. */
|
||||
uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize
|
||||
- tsd_thread_allocated_last_event_get(tsd);
|
||||
return accumbytes >= tsd_prof_sample_event_wait_get(tsd);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
prof_active_assert(void) {
|
||||
cassert(config_prof);
|
||||
|
|
|
|||
|
|
@ -1,221 +0,0 @@
|
|||
#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
|
||||
#define JEMALLOC_INTERNAL_PROF_STRUCTS_H
|
||||
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/ckh.h"
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
#include "jemalloc/internal/prng.h"
|
||||
#include "jemalloc/internal/rb.h"
|
||||
|
||||
struct prof_bt_s {
|
||||
/* Backtrace, stored as len program counters. */
|
||||
void **vec;
|
||||
unsigned len;
|
||||
};
|
||||
|
||||
#ifdef JEMALLOC_PROF_LIBGCC
|
||||
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
|
||||
typedef struct {
|
||||
void **vec;
|
||||
unsigned *len;
|
||||
unsigned max;
|
||||
} prof_unwind_data_t;
|
||||
#endif
|
||||
|
||||
struct prof_cnt_s {
|
||||
/* Profiling counters. */
|
||||
uint64_t curobjs;
|
||||
uint64_t curobjs_shifted_unbiased;
|
||||
uint64_t curbytes;
|
||||
uint64_t curbytes_unbiased;
|
||||
uint64_t accumobjs;
|
||||
uint64_t accumobjs_shifted_unbiased;
|
||||
uint64_t accumbytes;
|
||||
uint64_t accumbytes_unbiased;
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
prof_tctx_state_initializing,
|
||||
prof_tctx_state_nominal,
|
||||
prof_tctx_state_dumping,
|
||||
prof_tctx_state_purgatory /* Dumper must finish destroying. */
|
||||
} prof_tctx_state_t;
|
||||
|
||||
struct prof_tctx_s {
|
||||
/* Thread data for thread that performed the allocation. */
|
||||
prof_tdata_t *tdata;
|
||||
|
||||
/*
|
||||
* Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
|
||||
* defunct during teardown.
|
||||
*/
|
||||
uint64_t thr_uid;
|
||||
uint64_t thr_discrim;
|
||||
|
||||
/*
|
||||
* Reference count of how many times this tctx object is referenced in
|
||||
* recent allocation / deallocation records, protected by tdata->lock.
|
||||
*/
|
||||
uint64_t recent_count;
|
||||
|
||||
/* Profiling counters, protected by tdata->lock. */
|
||||
prof_cnt_t cnts;
|
||||
|
||||
/* Associated global context. */
|
||||
prof_gctx_t *gctx;
|
||||
|
||||
/*
|
||||
* UID that distinguishes multiple tctx's created by the same thread,
|
||||
* but coexisting in gctx->tctxs. There are two ways that such
|
||||
* coexistence can occur:
|
||||
* - A dumper thread can cause a tctx to be retained in the purgatory
|
||||
* state.
|
||||
* - Although a single "producer" thread must create all tctx's which
|
||||
* share the same thr_uid, multiple "consumers" can each concurrently
|
||||
* execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
|
||||
* gets called once each time cnts.cur{objs,bytes} drop to 0, but this
|
||||
* threshold can be hit again before the first consumer finishes
|
||||
* executing prof_tctx_destroy().
|
||||
*/
|
||||
uint64_t tctx_uid;
|
||||
|
||||
/* Linkage into gctx's tctxs. */
|
||||
rb_node(prof_tctx_t) tctx_link;
|
||||
|
||||
/*
|
||||
* True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
|
||||
* sample vs destroy race.
|
||||
*/
|
||||
bool prepared;
|
||||
|
||||
/* Current dump-related state, protected by gctx->lock. */
|
||||
prof_tctx_state_t state;
|
||||
|
||||
/*
|
||||
* Copy of cnts snapshotted during early dump phase, protected by
|
||||
* dump_mtx.
|
||||
*/
|
||||
prof_cnt_t dump_cnts;
|
||||
};
|
||||
typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
|
||||
|
||||
struct prof_info_s {
|
||||
/* Time when the allocation was made. */
|
||||
nstime_t alloc_time;
|
||||
/* Points to the prof_tctx_t corresponding to the allocation. */
|
||||
prof_tctx_t *alloc_tctx;
|
||||
/* Allocation request size. */
|
||||
size_t alloc_size;
|
||||
};
|
||||
|
||||
struct prof_gctx_s {
|
||||
/* Protects nlimbo, cnt_summed, and tctxs. */
|
||||
malloc_mutex_t *lock;
|
||||
|
||||
/*
|
||||
* Number of threads that currently cause this gctx to be in a state of
|
||||
* limbo due to one of:
|
||||
* - Initializing this gctx.
|
||||
* - Initializing per thread counters associated with this gctx.
|
||||
* - Preparing to destroy this gctx.
|
||||
* - Dumping a heap profile that includes this gctx.
|
||||
* nlimbo must be 1 (single destroyer) in order to safely destroy the
|
||||
* gctx.
|
||||
*/
|
||||
unsigned nlimbo;
|
||||
|
||||
/*
|
||||
* Tree of profile counters, one for each thread that has allocated in
|
||||
* this context.
|
||||
*/
|
||||
prof_tctx_tree_t tctxs;
|
||||
|
||||
/* Linkage for tree of contexts to be dumped. */
|
||||
rb_node(prof_gctx_t) dump_link;
|
||||
|
||||
/* Temporary storage for summation during dump. */
|
||||
prof_cnt_t cnt_summed;
|
||||
|
||||
/* Associated backtrace. */
|
||||
prof_bt_t bt;
|
||||
|
||||
/* Backtrace vector, variable size, referred to by bt. */
|
||||
void *vec[1];
|
||||
};
|
||||
typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
|
||||
|
||||
struct prof_tdata_s {
|
||||
malloc_mutex_t *lock;
|
||||
|
||||
/* Monotonically increasing unique thread identifier. */
|
||||
uint64_t thr_uid;
|
||||
|
||||
/*
|
||||
* Monotonically increasing discriminator among tdata structures
|
||||
* associated with the same thr_uid.
|
||||
*/
|
||||
uint64_t thr_discrim;
|
||||
|
||||
rb_node(prof_tdata_t) tdata_link;
|
||||
|
||||
/*
|
||||
* Counter used to initialize prof_tctx_t's tctx_uid. No locking is
|
||||
* necessary when incrementing this field, because only one thread ever
|
||||
* does so.
|
||||
*/
|
||||
uint64_t tctx_uid_next;
|
||||
|
||||
/*
|
||||
* Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
|
||||
* backtraces for which it has non-zero allocation/deallocation counters
|
||||
* associated with thread-specific prof_tctx_t objects. Other threads
|
||||
* may write to prof_tctx_t contents when freeing associated objects.
|
||||
*/
|
||||
ckh_t bt2tctx;
|
||||
|
||||
/* Included in heap profile dumps if has content. */
|
||||
char thread_name[PROF_THREAD_NAME_MAX_LEN];
|
||||
|
||||
/* State used to avoid dumping while operating on prof internals. */
|
||||
bool enq;
|
||||
bool enq_idump;
|
||||
bool enq_gdump;
|
||||
|
||||
/*
|
||||
* Set to true during an early dump phase for tdata's which are
|
||||
* currently being dumped. New threads' tdata's have this initialized
|
||||
* to false so that they aren't accidentally included in later dump
|
||||
* phases.
|
||||
*/
|
||||
bool dumping;
|
||||
|
||||
/*
|
||||
* True if profiling is active for this tdata's thread
|
||||
* (thread.prof.active mallctl).
|
||||
*/
|
||||
bool active;
|
||||
|
||||
bool attached;
|
||||
bool expired;
|
||||
|
||||
/* Temporary storage for summation during dump. */
|
||||
prof_cnt_t cnt_summed;
|
||||
|
||||
/* Backtrace vector, used for calls to prof_backtrace(). */
|
||||
void **vec;
|
||||
};
|
||||
typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
|
||||
|
||||
struct prof_recent_s {
|
||||
nstime_t alloc_time;
|
||||
nstime_t dalloc_time;
|
||||
|
||||
ql_elm(prof_recent_t) link;
|
||||
size_t size;
|
||||
size_t usize;
|
||||
atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
|
||||
prof_tctx_t *alloc_tctx;
|
||||
prof_tctx_t *dalloc_tctx;
|
||||
};
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H
|
||||
#define JEMALLOC_INTERNAL_PROF_TYPES_H
|
||||
|
||||
typedef struct prof_bt_s prof_bt_t;
|
||||
typedef struct prof_cnt_s prof_cnt_t;
|
||||
typedef struct prof_tctx_s prof_tctx_t;
|
||||
typedef struct prof_info_s prof_info_t;
|
||||
typedef struct prof_gctx_s prof_gctx_t;
|
||||
typedef struct prof_tdata_s prof_tdata_t;
|
||||
typedef struct prof_recent_s prof_recent_t;
|
||||
|
||||
/* Option defaults. */
|
||||
#ifdef JEMALLOC_PROF
|
||||
# define PROF_PREFIX_DEFAULT "jeprof"
|
||||
#else
|
||||
# define PROF_PREFIX_DEFAULT ""
|
||||
#endif
|
||||
#define LG_PROF_SAMPLE_DEFAULT 19
|
||||
#define LG_PROF_INTERVAL_DEFAULT -1
|
||||
|
||||
/*
|
||||
* Hard limit on stack backtrace depth. The version of prof_backtrace() that
|
||||
* is based on __builtin_return_address() necessarily has a hard-coded number
|
||||
* of backtrace frame handlers, and should be kept in sync with this setting.
|
||||
*/
|
||||
#ifdef JEMALLOC_PROF_GCC
|
||||
# define PROF_BT_MAX_LIMIT 256
|
||||
#else
|
||||
# define PROF_BT_MAX_LIMIT UINT_MAX
|
||||
#endif
|
||||
#define PROF_BT_MAX_DEFAULT 128
|
||||
|
||||
/* Initial hash table size. */
|
||||
#define PROF_CKH_MINITEMS 64
|
||||
|
||||
/* Size of memory buffer to use when writing dump files. */
|
||||
#ifndef JEMALLOC_PROF
|
||||
/* Minimize memory bloat for non-prof builds. */
|
||||
# define PROF_DUMP_BUFSIZE 1
|
||||
#elif defined(JEMALLOC_DEBUG)
|
||||
/* Use a small buffer size in debug build, mainly to facilitate testing. */
|
||||
# define PROF_DUMP_BUFSIZE 16
|
||||
#else
|
||||
# define PROF_DUMP_BUFSIZE 65536
|
||||
#endif
|
||||
|
||||
/* Size of size class related tables */
|
||||
#ifdef JEMALLOC_PROF
|
||||
# define PROF_SC_NSIZES SC_NSIZES
|
||||
#else
|
||||
/* Minimize memory bloat for non-prof builds. */
|
||||
# define PROF_SC_NSIZES 1
|
||||
#endif
|
||||
|
||||
/* Size of stack-allocated buffer used by prof_printf(). */
|
||||
#define PROF_PRINTF_BUFSIZE 128
|
||||
|
||||
/*
|
||||
* Number of mutexes shared among all gctx's. No space is allocated for these
|
||||
* unless profiling is enabled, so it's okay to over-provision.
|
||||
*/
|
||||
#define PROF_NCTX_LOCKS 1024
|
||||
|
||||
/*
|
||||
* Number of mutexes shared among all tdata's. No space is allocated for these
|
||||
* unless profiling is enabled, so it's okay to over-provision.
|
||||
*/
|
||||
#define PROF_NTDATA_LOCKS 256
|
||||
|
||||
/* Minimize memory bloat for non-prof builds. */
|
||||
#ifdef JEMALLOC_PROF
|
||||
# define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
|
||||
#else
|
||||
# define PROF_DUMP_FILENAME_LEN 1
|
||||
#endif
|
||||
|
||||
/* Default number of recent allocations to record. */
|
||||
#define PROF_RECENT_ALLOC_MAX_DEFAULT 0
|
||||
|
||||
/* Thread name storage size limit. */
|
||||
#define PROF_THREAD_NAME_MAX_LEN 16
|
||||
|
||||
/*
|
||||
* Minimum required alignment for sampled allocations. Over-aligning sampled
|
||||
* allocations allows us to quickly identify them on the dalloc path without
|
||||
* resorting to metadata lookup.
|
||||
*/
|
||||
#define PROF_SAMPLE_ALIGNMENT PAGE
|
||||
#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
|
||||
|
||||
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
|
||||
#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
|
||||
198
include/jemalloc/internal/tcache.h
Normal file
198
include/jemalloc/internal/tcache.h
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
#ifndef JEMALLOC_INTERNAL_TCACHE_H
|
||||
#define JEMALLOC_INTERNAL_TCACHE_H
|
||||
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/cache_bin.h"
|
||||
#include "jemalloc/internal/ql.h"
|
||||
#include "jemalloc/internal/sc.h"
|
||||
#include "jemalloc/internal/sz.h"
|
||||
#include "jemalloc/internal/thread_event_registry.h"
|
||||
#include "jemalloc/internal/ticker.h"
|
||||
|
||||
/* Forward decl; only base_t * is used as a pointer arg below. */
|
||||
typedef struct base_s base_t;
|
||||
|
||||
/******************************************************************************/
|
||||
/* TYPES */
|
||||
/******************************************************************************/
|
||||
|
||||
typedef struct tcache_slow_s tcache_slow_t;
|
||||
typedef struct tcache_s tcache_t;
|
||||
typedef struct tcaches_s tcaches_t;
|
||||
|
||||
/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
|
||||
#define TCACHE_ZERO_INITIALIZER \
|
||||
{ 0 }
|
||||
#define TCACHE_SLOW_ZERO_INITIALIZER \
|
||||
{ \
|
||||
{ 0 } \
|
||||
}
|
||||
|
||||
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
|
||||
#define TCACHE_ENABLED_ZERO_INITIALIZER false
|
||||
|
||||
/* Used for explicit tcache only. Means flushed but not destroyed. */
|
||||
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
|
||||
#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
|
||||
|
||||
#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
|
||||
#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
|
||||
#define TCACHE_NBINS_MAX \
|
||||
(SC_NBINS \
|
||||
+ SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \
|
||||
+ 1)
|
||||
#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
|
||||
#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
|
||||
#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
|
||||
#define TCACHE_GC_LARGE_NBINS_MAX 1
|
||||
|
||||
/******************************************************************************/
|
||||
/* STRUCTS */
|
||||
/******************************************************************************/
|
||||
|
||||
/*
|
||||
* The tcache state is split into the slow and hot path data. Each has a
|
||||
* pointer to the other, and the data always comes in pairs. The layout of each
|
||||
* of them varies in practice; tcache_slow lives in the TSD for the automatic
|
||||
* tcache, and as part of a dynamic allocation for manual allocations. Keeping
|
||||
* a pointer to tcache_slow lets us treat these cases uniformly, rather than
|
||||
* splitting up the tcache [de]allocation code into those paths called with the
|
||||
* TSD tcache and those called with a manual tcache.
|
||||
*/
|
||||
|
||||
struct tcache_slow_s {
|
||||
/*
|
||||
* The descriptor lets the arena find our cache bins without seeing the
|
||||
* tcache definition. This enables arenas to aggregate stats across
|
||||
* tcaches without having a tcache dependency.
|
||||
*/
|
||||
cache_bin_array_descriptor_t cache_bin_array_descriptor;
|
||||
|
||||
/* The arena this tcache is associated with. */
|
||||
arena_t *arena;
|
||||
/* The number of bins activated in the tcache. */
|
||||
unsigned tcache_nbins;
|
||||
/* Last time GC has been performed. */
|
||||
nstime_t last_gc_time;
|
||||
/* Next bin to GC. */
|
||||
szind_t next_gc_bin;
|
||||
szind_t next_gc_bin_small;
|
||||
szind_t next_gc_bin_large;
|
||||
/* For small bins, help determine how many items to fill at a time. */
|
||||
cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS];
|
||||
/* For small bins, whether has been refilled since last GC. */
|
||||
bool bin_refilled[SC_NBINS];
|
||||
/*
|
||||
* For small bins, the number of items we can pretend to flush before
|
||||
* actually flushing.
|
||||
*/
|
||||
uint8_t bin_flush_delay_items[SC_NBINS];
|
||||
/*
|
||||
* The start of the allocation containing the dynamic allocation for
|
||||
* either the cache bins alone, or the cache bin memory as well as this
|
||||
* tcache_slow_t and its associated tcache_t.
|
||||
*/
|
||||
void *dyn_alloc;
|
||||
|
||||
/* The associated bins. */
|
||||
tcache_t *tcache;
|
||||
};
|
||||
|
||||
struct tcache_s {
|
||||
tcache_slow_t *tcache_slow;
|
||||
cache_bin_t bins[TCACHE_NBINS_MAX];
|
||||
};
|
||||
|
||||
/* Linkage for list of available (previously used) explicit tcache IDs. */
|
||||
struct tcaches_s {
|
||||
union {
|
||||
tcache_t *tcache;
|
||||
tcaches_t *next;
|
||||
};
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
/* EXTERNS */
|
||||
/******************************************************************************/
|
||||
|
||||
extern bool opt_tcache;
|
||||
extern size_t opt_tcache_max;
|
||||
extern ssize_t opt_lg_tcache_nslots_mul;
|
||||
extern unsigned opt_tcache_nslots_small_min;
|
||||
extern unsigned opt_tcache_nslots_small_max;
|
||||
extern unsigned opt_tcache_nslots_large;
|
||||
extern ssize_t opt_lg_tcache_shift;
|
||||
extern size_t opt_tcache_gc_incr_bytes;
|
||||
extern size_t opt_tcache_gc_delay_bytes;
|
||||
extern unsigned opt_lg_tcache_flush_small_div;
|
||||
extern unsigned opt_lg_tcache_flush_large_div;
|
||||
|
||||
/*
|
||||
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
|
||||
* large-object bins. This is only used during threads initialization and
|
||||
* changing it will not reflect on initialized threads as expected. Thus,
|
||||
* it should not be changed on the fly. To change the number of tcache bins
|
||||
* in use, refer to tcache_nbins of each tcache.
|
||||
*/
|
||||
extern unsigned global_do_not_change_tcache_nbins;
|
||||
|
||||
/*
|
||||
* Maximum cached size class. Same as above, this is only used during threads
|
||||
* initialization and should not be changed. To change the maximum cached size
|
||||
* class, refer to tcache_max of each tcache.
|
||||
*/
|
||||
extern size_t global_do_not_change_tcache_maxclass;
|
||||
|
||||
/*
|
||||
* Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
|
||||
* usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are
|
||||
* completely disjoint from this data structure. tcaches starts off as a sparse
|
||||
* array, so it has no physical memory footprint until individual pages are
|
||||
* touched. This allows the entire array to be allocated the first time an
|
||||
* explicit tcache is created without a disproportionate impact on memory usage.
|
||||
*/
|
||||
extern tcaches_t *tcaches;
|
||||
|
||||
size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
|
||||
void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
|
||||
|
||||
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
|
||||
cache_bin_t *cache_bin, szind_t binind, unsigned rem);
|
||||
void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
|
||||
cache_bin_t *cache_bin, szind_t binind, unsigned rem);
|
||||
void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
|
||||
cache_bin_t *cache_bin, szind_t binind, bool is_small);
|
||||
bool tcache_bin_info_default_init(
|
||||
const char *bin_settings_segment_cur, size_t len_left);
|
||||
bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
|
||||
bool tcache_bin_ncached_max_read(
|
||||
tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max);
|
||||
void tcache_arena_reassociate(
|
||||
tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena);
|
||||
tcache_t *tcache_create_explicit(tsd_t *tsd);
|
||||
bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
|
||||
void tcache_cleanup(tsd_t *tsd);
|
||||
bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
|
||||
void tcaches_flush(tsd_t *tsd, unsigned ind);
|
||||
void tcaches_destroy(tsd_t *tsd, unsigned ind);
|
||||
bool tcache_boot(tsdn_t *tsdn, base_t *base);
|
||||
void tcache_arena_associate(
|
||||
tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena);
|
||||
cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor(
|
||||
tsdn_t *tsdn, arena_t *arena);
|
||||
void tcache_prefork(tsdn_t *tsdn);
|
||||
void tcache_postfork_parent(tsdn_t *tsdn);
|
||||
void tcache_postfork_child(tsdn_t *tsdn);
|
||||
void tcache_flush(tsd_t *tsd);
|
||||
bool tsd_tcache_enabled_data_init(tsd_t *tsd);
|
||||
void tcache_enabled_set(tsd_t *tsd, bool enabled);
|
||||
|
||||
extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
|
||||
size_t alignment);
|
||||
|
||||
void tcache_assert_initialized(tcache_t *tcache);
|
||||
|
||||
extern te_base_cb_t tcache_gc_te_handler;
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_TCACHE_H */
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
|
||||
#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
|
||||
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/base.h"
|
||||
#include "jemalloc/internal/cache_bin.h"
|
||||
#include "jemalloc/internal/sz.h"
|
||||
#include "jemalloc/internal/tcache_types.h"
|
||||
#include "jemalloc/internal/thread_event_registry.h"
|
||||
|
||||
extern bool opt_tcache;
|
||||
extern size_t opt_tcache_max;
|
||||
extern ssize_t opt_lg_tcache_nslots_mul;
|
||||
extern unsigned opt_tcache_nslots_small_min;
|
||||
extern unsigned opt_tcache_nslots_small_max;
|
||||
extern unsigned opt_tcache_nslots_large;
|
||||
extern ssize_t opt_lg_tcache_shift;
|
||||
extern size_t opt_tcache_gc_incr_bytes;
|
||||
extern size_t opt_tcache_gc_delay_bytes;
|
||||
extern unsigned opt_lg_tcache_flush_small_div;
|
||||
extern unsigned opt_lg_tcache_flush_large_div;
|
||||
|
||||
/*
|
||||
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
|
||||
* large-object bins. This is only used during threads initialization and
|
||||
* changing it will not reflect on initialized threads as expected. Thus,
|
||||
* it should not be changed on the fly. To change the number of tcache bins
|
||||
* in use, refer to tcache_nbins of each tcache.
|
||||
*/
|
||||
extern unsigned global_do_not_change_tcache_nbins;
|
||||
|
||||
/*
|
||||
* Maximum cached size class. Same as above, this is only used during threads
|
||||
* initialization and should not be changed. To change the maximum cached size
|
||||
* class, refer to tcache_max of each tcache.
|
||||
*/
|
||||
extern size_t global_do_not_change_tcache_maxclass;
|
||||
|
||||
/*
|
||||
* Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
|
||||
* usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are
|
||||
* completely disjoint from this data structure. tcaches starts off as a sparse
|
||||
* array, so it has no physical memory footprint until individual pages are
|
||||
* touched. This allows the entire array to be allocated the first time an
|
||||
* explicit tcache is created without a disproportionate impact on memory usage.
|
||||
*/
|
||||
extern tcaches_t *tcaches;
|
||||
|
||||
size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
|
||||
void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
|
||||
|
||||
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
|
||||
cache_bin_t *cache_bin, szind_t binind, unsigned rem);
|
||||
void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
|
||||
cache_bin_t *cache_bin, szind_t binind, unsigned rem);
|
||||
void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
|
||||
cache_bin_t *cache_bin, szind_t binind, bool is_small);
|
||||
bool tcache_bin_info_default_init(
|
||||
const char *bin_settings_segment_cur, size_t len_left);
|
||||
bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
|
||||
bool tcache_bin_ncached_max_read(
|
||||
tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max);
|
||||
void tcache_arena_reassociate(
|
||||
tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena);
|
||||
tcache_t *tcache_create_explicit(tsd_t *tsd);
|
||||
bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
|
||||
void tcache_cleanup(tsd_t *tsd);
|
||||
bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
|
||||
void tcaches_flush(tsd_t *tsd, unsigned ind);
|
||||
void tcaches_destroy(tsd_t *tsd, unsigned ind);
|
||||
bool tcache_boot(tsdn_t *tsdn, base_t *base);
|
||||
void tcache_arena_associate(
|
||||
tsdn_t *tsdn, tcache_slow_t *tcache_slow, arena_t *arena);
|
||||
cache_bin_array_descriptor_t *tcache_postfork_arena_descriptor(
|
||||
tsdn_t *tsdn, arena_t *arena);
|
||||
void tcache_prefork(tsdn_t *tsdn);
|
||||
void tcache_postfork_parent(tsdn_t *tsdn);
|
||||
void tcache_postfork_child(tsdn_t *tsdn);
|
||||
void tcache_flush(tsd_t *tsd);
|
||||
bool tsd_tcache_enabled_data_init(tsd_t *tsd);
|
||||
void tcache_enabled_set(tsd_t *tsd, bool enabled);
|
||||
|
||||
extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
|
||||
size_t alignment);
|
||||
|
||||
void tcache_assert_initialized(tcache_t *tcache);
|
||||
|
||||
extern te_base_cb_t tcache_gc_te_handler;
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
|
||||
|
|
@ -10,7 +10,7 @@
|
|||
#include "jemalloc/internal/san.h"
|
||||
#include "jemalloc/internal/sc.h"
|
||||
#include "jemalloc/internal/sz.h"
|
||||
#include "jemalloc/internal/tcache_externs.h"
|
||||
#include "jemalloc/internal/tcache.h"
|
||||
#include "jemalloc/internal/util.h"
|
||||
|
||||
static inline bool
|
||||
|
|
|
|||
|
|
@ -1,72 +0,0 @@
|
|||
#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
|
||||
#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
|
||||
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/cache_bin.h"
|
||||
#include "jemalloc/internal/ql.h"
|
||||
#include "jemalloc/internal/sc.h"
|
||||
#include "jemalloc/internal/tcache_types.h"
|
||||
#include "jemalloc/internal/ticker.h"
|
||||
|
||||
/*
|
||||
* The tcache state is split into the slow and hot path data. Each has a
|
||||
* pointer to the other, and the data always comes in pairs. The layout of each
|
||||
* of them varies in practice; tcache_slow lives in the TSD for the automatic
|
||||
* tcache, and as part of a dynamic allocation for manual allocations. Keeping
|
||||
* a pointer to tcache_slow lets us treat these cases uniformly, rather than
|
||||
* splitting up the tcache [de]allocation code into those paths called with the
|
||||
* TSD tcache and those called with a manual tcache.
|
||||
*/
|
||||
|
||||
struct tcache_slow_s {
|
||||
/*
|
||||
* The descriptor lets the arena find our cache bins without seeing the
|
||||
* tcache definition. This enables arenas to aggregate stats across
|
||||
* tcaches without having a tcache dependency.
|
||||
*/
|
||||
cache_bin_array_descriptor_t cache_bin_array_descriptor;
|
||||
|
||||
/* The arena this tcache is associated with. */
|
||||
arena_t *arena;
|
||||
/* The number of bins activated in the tcache. */
|
||||
unsigned tcache_nbins;
|
||||
/* Last time GC has been performed. */
|
||||
nstime_t last_gc_time;
|
||||
/* Next bin to GC. */
|
||||
szind_t next_gc_bin;
|
||||
szind_t next_gc_bin_small;
|
||||
szind_t next_gc_bin_large;
|
||||
/* For small bins, help determine how many items to fill at a time. */
|
||||
cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS];
|
||||
/* For small bins, whether has been refilled since last GC. */
|
||||
bool bin_refilled[SC_NBINS];
|
||||
/*
|
||||
* For small bins, the number of items we can pretend to flush before
|
||||
* actually flushing.
|
||||
*/
|
||||
uint8_t bin_flush_delay_items[SC_NBINS];
|
||||
/*
|
||||
* The start of the allocation containing the dynamic allocation for
|
||||
* either the cache bins alone, or the cache bin memory as well as this
|
||||
* tcache_slow_t and its associated tcache_t.
|
||||
*/
|
||||
void *dyn_alloc;
|
||||
|
||||
/* The associated bins. */
|
||||
tcache_t *tcache;
|
||||
};
|
||||
|
||||
struct tcache_s {
|
||||
tcache_slow_t *tcache_slow;
|
||||
cache_bin_t bins[TCACHE_NBINS_MAX];
|
||||
};
|
||||
|
||||
/* Linkage for list of available (previously used) explicit tcache IDs. */
|
||||
struct tcaches_s {
|
||||
union {
|
||||
tcache_t *tcache;
|
||||
tcaches_t *next;
|
||||
};
|
||||
};
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
|
||||
#define JEMALLOC_INTERNAL_TCACHE_TYPES_H
|
||||
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/sc.h"
|
||||
|
||||
typedef struct tcache_slow_s tcache_slow_t;
|
||||
typedef struct tcache_s tcache_t;
|
||||
typedef struct tcaches_s tcaches_t;
|
||||
|
||||
/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
|
||||
#define TCACHE_ZERO_INITIALIZER \
|
||||
{ 0 }
|
||||
#define TCACHE_SLOW_ZERO_INITIALIZER \
|
||||
{ \
|
||||
{ 0 } \
|
||||
}
|
||||
|
||||
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
|
||||
#define TCACHE_ENABLED_ZERO_INITIALIZER false
|
||||
|
||||
/* Used for explicit tcache only. Means flushed but not destroyed. */
|
||||
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
|
||||
#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
|
||||
|
||||
#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
|
||||
#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
|
||||
#define TCACHE_NBINS_MAX \
|
||||
(SC_NBINS \
|
||||
+ SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) \
|
||||
+ 1)
|
||||
#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
|
||||
#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
|
||||
#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
|
||||
#define TCACHE_GC_LARGE_NBINS_MAX 1
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
|
||||
|
|
@ -9,11 +9,12 @@
|
|||
#include "jemalloc/internal/tsd_binshards.h"
|
||||
#include "jemalloc/internal/jemalloc_internal_externs.h"
|
||||
#include "jemalloc/internal/peak.h"
|
||||
#include "jemalloc/internal/prof_types.h"
|
||||
#include "jemalloc/internal/rtree_tsd.h"
|
||||
#include "jemalloc/internal/tcache_structs.h"
|
||||
#include "jemalloc/internal/tcache_types.h"
|
||||
#include "jemalloc/internal/tcache.h"
|
||||
#include "jemalloc/internal/thread_event_registry.h"
|
||||
|
||||
/* Forward decl; tsd_internals.h only uses prof_tdata_t as a pointer type. */
|
||||
typedef struct prof_tdata_s prof_tdata_t;
|
||||
#include "jemalloc/internal/tsd_types.h"
|
||||
#include "jemalloc/internal/util.h"
|
||||
#include "jemalloc/internal/witness.h"
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@
|
|||
|
||||
#include "jemalloc/internal/thread_event.h"
|
||||
#include "jemalloc/internal/thread_event_registry.h"
|
||||
#include "jemalloc/internal/tcache_externs.h"
|
||||
#include "jemalloc/internal/tcache.h"
|
||||
#include "jemalloc/internal/peak_event.h"
|
||||
#include "jemalloc/internal/prof_externs.h"
|
||||
#include "jemalloc/internal/prof.h"
|
||||
#include "jemalloc/internal/stats.h"
|
||||
|
||||
static malloc_mutex_t uevents_mu;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue