mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-16 23:51:44 +03:00
This gives more accurate attribution of bytes and counts to stack traces, without introducing backwards incompatibilities in heap-profile parsing tools. We track the ideal reported (to the end user) number of bytes more carefully inside core jemalloc. When dumping heap profiles, insteading of outputting our counts directly, we output counts that will cause parsing tools to give a result close to the value we want. We retain the old version as an opt setting, to let users who are tracking values on a per-component basis to keep their metrics stable until they decide to switch.
775 lines
21 KiB
C
775 lines
21 KiB
C
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
|
|
|
#include "jemalloc/internal/ctl.h"
|
|
#include "jemalloc/internal/assert.h"
|
|
#include "jemalloc/internal/mutex.h"
|
|
#include "jemalloc/internal/counter.h"
|
|
#include "jemalloc/internal/prof_data.h"
|
|
#include "jemalloc/internal/prof_log.h"
|
|
#include "jemalloc/internal/prof_recent.h"
|
|
#include "jemalloc/internal/prof_sys.h"
|
|
#include "jemalloc/internal/thread_event.h"
|
|
|
|
/*
|
|
* This file implements the profiling "APIs" needed by other parts of jemalloc,
|
|
* and also manages the relevant "operational" data, mainly options and mutexes;
|
|
* the core profiling data structures are encapsulated in prof_data.c.
|
|
*/
|
|
|
|
/******************************************************************************/
|
|
|
|
/* Data. */
|
|
|
|
bool opt_prof = false;
|
|
bool opt_prof_active = true;
|
|
bool opt_prof_thread_active_init = true;
|
|
size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
|
|
ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
|
|
bool opt_prof_gdump = false;
|
|
bool opt_prof_final = false;
|
|
bool opt_prof_leak = false;
|
|
bool opt_prof_accum = false;
|
|
char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
|
|
bool opt_prof_sys_thread_name = false;
|
|
bool opt_prof_unbias = true;
|
|
|
|
/* Accessed via prof_sample_event_handler(). */
|
|
static counter_accum_t prof_idump_accumulated;
|
|
|
|
/*
|
|
* Initialized as opt_prof_active, and accessed via
|
|
* prof_active_[gs]et{_unlocked,}().
|
|
*/
|
|
bool prof_active;
|
|
static malloc_mutex_t prof_active_mtx;
|
|
|
|
/*
|
|
* Initialized as opt_prof_thread_active_init, and accessed via
|
|
* prof_thread_active_init_[gs]et().
|
|
*/
|
|
static bool prof_thread_active_init;
|
|
static malloc_mutex_t prof_thread_active_init_mtx;
|
|
|
|
/*
|
|
* Initialized as opt_prof_gdump, and accessed via
|
|
* prof_gdump_[gs]et{_unlocked,}().
|
|
*/
|
|
bool prof_gdump_val;
|
|
static malloc_mutex_t prof_gdump_mtx;
|
|
|
|
uint64_t prof_interval = 0;
|
|
|
|
size_t lg_prof_sample;
|
|
size_t prof_unbiased_sz[SC_NSIZES];
|
|
size_t prof_shifted_unbiased_cnt[SC_NSIZES];
|
|
|
|
static uint64_t next_thr_uid;
|
|
static malloc_mutex_t next_thr_uid_mtx;
|
|
|
|
/* Do not dump any profiles until bootstrapping is complete. */
|
|
bool prof_booted = false;
|
|
|
|
/******************************************************************************/
|
|
|
|
void prof_unbias_map_init() {
|
|
/* See the comment in prof_sample_new_event_wait */
|
|
#ifdef JEMALLOC_PROF
|
|
for (szind_t i = 0; i < SC_NSIZES; i++) {
|
|
double sz = (double)sz_index2size(i);
|
|
double rate = (double)(ZU(1) << lg_prof_sample);
|
|
double div_val = 1.0 - exp(-sz / rate);
|
|
double unbiased_sz = sz / div_val;
|
|
/*
|
|
* The "true" right value for the unbiased count is
|
|
* 1.0/(1 - exp(-sz/rate)). The problem is, we keep the counts
|
|
* as integers (for a variety of reasons -- rounding errors
|
|
* could trigger asserts, and not all libcs can properly handle
|
|
* floating point arithmetic during malloc calls inside libc).
|
|
* Rounding to an integer, though, can lead to rounding errors
|
|
* of over 30% for sizes close to the sampling rate. So
|
|
* instead, we multiply by a constant, dividing the maximum
|
|
* possible roundoff error by that constant. To avoid overflow
|
|
* in summing up size_t values, the largest safe constant we can
|
|
* pick is the size of the smallest allocation.
|
|
*/
|
|
double cnt_shift = (double)(ZU(1) << SC_LG_TINY_MIN);
|
|
double shifted_unbiased_cnt = cnt_shift / div_val;
|
|
prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
|
|
prof_shifted_unbiased_cnt[i] = (size_t)round(
|
|
shifted_unbiased_cnt);
|
|
}
|
|
#else
|
|
unreachable();
|
|
#endif
|
|
}
|
|
|
|
/******************************************************************************/
|
|
|
|
void
|
|
prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
|
|
cassert(config_prof);
|
|
|
|
if (tsd_reentrancy_level_get(tsd) > 0) {
|
|
assert((uintptr_t)tctx == (uintptr_t)1U);
|
|
return;
|
|
}
|
|
|
|
if ((uintptr_t)tctx > (uintptr_t)1U) {
|
|
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
|
|
tctx->prepared = false;
|
|
prof_tctx_try_destroy(tsd, tctx);
|
|
}
|
|
}
|
|
|
|
void
|
|
prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
|
|
size_t usize, prof_tctx_t *tctx) {
|
|
if (opt_prof_sys_thread_name) {
|
|
prof_sys_thread_name_fetch(tsd);
|
|
}
|
|
|
|
edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
|
|
ptr);
|
|
prof_info_set(tsd, edata, tctx);
|
|
|
|
szind_t szind = sz_size2index(size);
|
|
|
|
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
|
|
/*
|
|
* We need to do these map lookups while holding the lock, to avoid the
|
|
* possibility of races with prof_reset calls, which update the map and
|
|
* then acquire the lock. This actually still leaves a data race on the
|
|
* contents of the unbias map, but we have not yet gone through and
|
|
* atomic-ified the prof module, and compilers are not yet causing us
|
|
* issues. The key thing is to make sure that, if we read garbage data,
|
|
* the prof_reset call is about to mark our tctx as expired before any
|
|
* dumping of our corrupted output is attempted.
|
|
*/
|
|
size_t shifted_unbiased_cnt = prof_shifted_unbiased_cnt[szind];
|
|
size_t unbiased_bytes = prof_unbiased_sz[szind];
|
|
tctx->cnts.curobjs++;
|
|
tctx->cnts.curobjs_shifted_unbiased += shifted_unbiased_cnt;
|
|
tctx->cnts.curbytes += usize;
|
|
tctx->cnts.curbytes_unbiased += unbiased_bytes;
|
|
if (opt_prof_accum) {
|
|
tctx->cnts.accumobjs++;
|
|
tctx->cnts.accumobjs_shifted_unbiased += shifted_unbiased_cnt;
|
|
tctx->cnts.accumbytes += usize;
|
|
tctx->cnts.accumbytes_unbiased += unbiased_bytes;
|
|
}
|
|
bool record_recent = prof_recent_alloc_prepare(tsd, tctx);
|
|
tctx->prepared = false;
|
|
malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
|
|
if (record_recent) {
|
|
assert(tctx == edata_prof_tctx_get(edata));
|
|
prof_recent_alloc(tsd, edata, size);
|
|
}
|
|
}
|
|
|
|
void
|
|
prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
|
|
assert(prof_info != NULL);
|
|
prof_tctx_t *tctx = prof_info->alloc_tctx;
|
|
assert((uintptr_t)tctx > (uintptr_t)1U);
|
|
|
|
szind_t szind = sz_size2index(usize);
|
|
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
|
|
|
|
assert(tctx->cnts.curobjs > 0);
|
|
assert(tctx->cnts.curbytes >= usize);
|
|
/*
|
|
* It's not correct to do equivalent asserts for unbiased bytes, because
|
|
* of the potential for races with prof.reset calls. The map contents
|
|
* should really be atomic, but we have not atomic-ified the prof module
|
|
* yet.
|
|
*/
|
|
tctx->cnts.curobjs--;
|
|
tctx->cnts.curobjs_shifted_unbiased -= prof_shifted_unbiased_cnt[szind];
|
|
tctx->cnts.curbytes -= usize;
|
|
tctx->cnts.curbytes_unbiased -= prof_unbiased_sz[szind];
|
|
|
|
prof_try_log(tsd, usize, prof_info);
|
|
|
|
prof_tctx_try_destroy(tsd, tctx);
|
|
}
|
|
|
|
prof_tctx_t *
|
|
prof_tctx_create(tsd_t *tsd) {
|
|
if (!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0) {
|
|
return NULL;
|
|
}
|
|
|
|
prof_tdata_t *tdata = prof_tdata_get(tsd, true);
|
|
if (tdata == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
prof_bt_t bt;
|
|
bt_init(&bt, tdata->vec);
|
|
prof_backtrace(tsd, &bt);
|
|
return prof_lookup(tsd, &bt);
|
|
}
|
|
|
|
/*
|
|
* The bodies of this function and prof_leakcheck() are compiled out unless heap
|
|
* profiling is enabled, so that it is possible to compile jemalloc with
|
|
* floating point support completely disabled. Avoiding floating point code is
|
|
* important on memory-constrained systems, but it also enables a workaround for
|
|
* versions of glibc that don't properly save/restore floating point registers
|
|
* during dynamic lazy symbol loading (which internally calls into whatever
|
|
* malloc implementation happens to be integrated into the application). Note
|
|
* that some compilers (e.g. gcc 4.8) may use floating point registers for fast
|
|
* memory moves, so jemalloc must be compiled with such optimizations disabled
|
|
* (e.g.
|
|
* -mno-sse) in order for the workaround to be complete.
|
|
*/
|
|
uint64_t
|
|
prof_sample_new_event_wait(tsd_t *tsd) {
|
|
#ifdef JEMALLOC_PROF
|
|
if (lg_prof_sample == 0) {
|
|
return TE_MIN_START_WAIT;
|
|
}
|
|
|
|
/*
|
|
* Compute sample interval as a geometrically distributed random
|
|
* variable with mean (2^lg_prof_sample).
|
|
*
|
|
* __ __
|
|
* | log(u) | 1
|
|
* bytes_until_sample = | -------- |, where p = ---------------
|
|
* | log(1-p) | lg_prof_sample
|
|
* 2
|
|
*
|
|
* For more information on the math, see:
|
|
*
|
|
* Non-Uniform Random Variate Generation
|
|
* Luc Devroye
|
|
* Springer-Verlag, New York, 1986
|
|
* pp 500
|
|
* (http://luc.devroye.org/rnbookindex.html)
|
|
*
|
|
* In the actual computation, there's a non-zero probability that our
|
|
* pseudo random number generator generates an exact 0, and to avoid
|
|
* log(0), we set u to 1.0 in case r is 0. Therefore u effectively is
|
|
* uniformly distributed in (0, 1] instead of [0, 1). Further, rather
|
|
* than taking the ceiling, we take the floor and then add 1, since
|
|
* otherwise bytes_until_sample would be 0 if u is exactly 1.0.
|
|
*/
|
|
uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
|
|
double u = (r == 0U) ? 1.0 : (double)r * (1.0/9007199254740992.0L);
|
|
return (uint64_t)(log(u) /
|
|
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
|
|
+ (uint64_t)1U;
|
|
#else
|
|
not_reached();
|
|
return TE_MAX_START_WAIT;
|
|
#endif
|
|
}
|
|
|
|
uint64_t
|
|
prof_sample_postponed_event_wait(tsd_t *tsd) {
|
|
/*
|
|
* The postponed wait time for prof sample event is computed as if we
|
|
* want a new wait time (i.e. as if the event were triggered). If we
|
|
* instead postpone to the immediate next allocation, like how we're
|
|
* handling the other events, then we can have sampling bias, if e.g.
|
|
* the allocation immediately following a reentrancy always comes from
|
|
* the same stack trace.
|
|
*/
|
|
return prof_sample_new_event_wait(tsd);
|
|
}
|
|
|
|
void
|
|
prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed) {
|
|
cassert(config_prof);
|
|
assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
|
|
if (prof_interval == 0 || !prof_active_get_unlocked()) {
|
|
return;
|
|
}
|
|
if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
|
|
prof_idump(tsd_tsdn(tsd));
|
|
}
|
|
}
|
|
|
|
static void
|
|
prof_fdump(void) {
|
|
tsd_t *tsd;
|
|
|
|
cassert(config_prof);
|
|
assert(opt_prof_final);
|
|
|
|
if (!prof_booted) {
|
|
return;
|
|
}
|
|
tsd = tsd_fetch();
|
|
assert(tsd_reentrancy_level_get(tsd) == 0);
|
|
|
|
prof_fdump_impl(tsd);
|
|
}
|
|
|
|
static bool
|
|
prof_idump_accum_init(void) {
|
|
cassert(config_prof);
|
|
|
|
return counter_accum_init(&prof_idump_accumulated, prof_interval);
|
|
}
|
|
|
|
void
|
|
prof_idump(tsdn_t *tsdn) {
|
|
tsd_t *tsd;
|
|
prof_tdata_t *tdata;
|
|
|
|
cassert(config_prof);
|
|
|
|
if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
|
|
return;
|
|
}
|
|
tsd = tsdn_tsd(tsdn);
|
|
if (tsd_reentrancy_level_get(tsd) > 0) {
|
|
return;
|
|
}
|
|
|
|
tdata = prof_tdata_get(tsd, true);
|
|
if (tdata == NULL) {
|
|
return;
|
|
}
|
|
if (tdata->enq) {
|
|
tdata->enq_idump = true;
|
|
return;
|
|
}
|
|
|
|
prof_idump_impl(tsd);
|
|
}
|
|
|
|
bool
|
|
prof_mdump(tsd_t *tsd, const char *filename) {
|
|
cassert(config_prof);
|
|
assert(tsd_reentrancy_level_get(tsd) == 0);
|
|
|
|
if (!opt_prof || !prof_booted) {
|
|
return true;
|
|
}
|
|
|
|
return prof_mdump_impl(tsd, filename);
|
|
}
|
|
|
|
void
|
|
prof_gdump(tsdn_t *tsdn) {
|
|
tsd_t *tsd;
|
|
prof_tdata_t *tdata;
|
|
|
|
cassert(config_prof);
|
|
|
|
if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
|
|
return;
|
|
}
|
|
tsd = tsdn_tsd(tsdn);
|
|
if (tsd_reentrancy_level_get(tsd) > 0) {
|
|
return;
|
|
}
|
|
|
|
tdata = prof_tdata_get(tsd, false);
|
|
if (tdata == NULL) {
|
|
return;
|
|
}
|
|
if (tdata->enq) {
|
|
tdata->enq_gdump = true;
|
|
return;
|
|
}
|
|
|
|
prof_gdump_impl(tsd);
|
|
}
|
|
|
|
static uint64_t
|
|
prof_thr_uid_alloc(tsdn_t *tsdn) {
|
|
uint64_t thr_uid;
|
|
|
|
malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
|
|
thr_uid = next_thr_uid;
|
|
next_thr_uid++;
|
|
malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
|
|
|
|
return thr_uid;
|
|
}
|
|
|
|
prof_tdata_t *
|
|
prof_tdata_init(tsd_t *tsd) {
|
|
return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
|
|
NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
|
|
}
|
|
|
|
prof_tdata_t *
|
|
prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
|
|
uint64_t thr_uid = tdata->thr_uid;
|
|
uint64_t thr_discrim = tdata->thr_discrim + 1;
|
|
char *thread_name = (tdata->thread_name != NULL) ?
|
|
prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
|
|
bool active = tdata->active;
|
|
|
|
prof_tdata_detach(tsd, tdata);
|
|
return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
|
|
active);
|
|
}
|
|
|
|
void
|
|
prof_tdata_cleanup(tsd_t *tsd) {
|
|
prof_tdata_t *tdata;
|
|
|
|
if (!config_prof) {
|
|
return;
|
|
}
|
|
|
|
tdata = tsd_prof_tdata_get(tsd);
|
|
if (tdata != NULL) {
|
|
prof_tdata_detach(tsd, tdata);
|
|
}
|
|
}
|
|
|
|
bool
|
|
prof_active_get(tsdn_t *tsdn) {
|
|
bool prof_active_current;
|
|
|
|
prof_active_assert();
|
|
malloc_mutex_lock(tsdn, &prof_active_mtx);
|
|
prof_active_current = prof_active;
|
|
malloc_mutex_unlock(tsdn, &prof_active_mtx);
|
|
return prof_active_current;
|
|
}
|
|
|
|
bool
|
|
prof_active_set(tsdn_t *tsdn, bool active) {
|
|
bool prof_active_old;
|
|
|
|
prof_active_assert();
|
|
malloc_mutex_lock(tsdn, &prof_active_mtx);
|
|
prof_active_old = prof_active;
|
|
prof_active = active;
|
|
malloc_mutex_unlock(tsdn, &prof_active_mtx);
|
|
prof_active_assert();
|
|
return prof_active_old;
|
|
}
|
|
|
|
const char *
|
|
prof_thread_name_get(tsd_t *tsd) {
|
|
assert(tsd_reentrancy_level_get(tsd) == 0);
|
|
|
|
prof_tdata_t *tdata;
|
|
|
|
tdata = prof_tdata_get(tsd, true);
|
|
if (tdata == NULL) {
|
|
return "";
|
|
}
|
|
return (tdata->thread_name != NULL ? tdata->thread_name : "");
|
|
}
|
|
|
|
int
|
|
prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
|
|
if (opt_prof_sys_thread_name) {
|
|
return ENOENT;
|
|
} else {
|
|
return prof_thread_name_set_impl(tsd, thread_name);
|
|
}
|
|
}
|
|
|
|
bool
|
|
prof_thread_active_get(tsd_t *tsd) {
|
|
assert(tsd_reentrancy_level_get(tsd) == 0);
|
|
|
|
prof_tdata_t *tdata;
|
|
|
|
tdata = prof_tdata_get(tsd, true);
|
|
if (tdata == NULL) {
|
|
return false;
|
|
}
|
|
return tdata->active;
|
|
}
|
|
|
|
bool
|
|
prof_thread_active_set(tsd_t *tsd, bool active) {
|
|
assert(tsd_reentrancy_level_get(tsd) == 0);
|
|
|
|
prof_tdata_t *tdata;
|
|
|
|
tdata = prof_tdata_get(tsd, true);
|
|
if (tdata == NULL) {
|
|
return true;
|
|
}
|
|
tdata->active = active;
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
prof_thread_active_init_get(tsdn_t *tsdn) {
|
|
bool active_init;
|
|
|
|
malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
|
|
active_init = prof_thread_active_init;
|
|
malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
|
|
return active_init;
|
|
}
|
|
|
|
bool
|
|
prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
|
|
bool active_init_old;
|
|
|
|
malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
|
|
active_init_old = prof_thread_active_init;
|
|
prof_thread_active_init = active_init;
|
|
malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
|
|
return active_init_old;
|
|
}
|
|
|
|
bool
|
|
prof_gdump_get(tsdn_t *tsdn) {
|
|
bool prof_gdump_current;
|
|
|
|
malloc_mutex_lock(tsdn, &prof_gdump_mtx);
|
|
prof_gdump_current = prof_gdump_val;
|
|
malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
|
|
return prof_gdump_current;
|
|
}
|
|
|
|
bool
|
|
prof_gdump_set(tsdn_t *tsdn, bool gdump) {
|
|
bool prof_gdump_old;
|
|
|
|
malloc_mutex_lock(tsdn, &prof_gdump_mtx);
|
|
prof_gdump_old = prof_gdump_val;
|
|
prof_gdump_val = gdump;
|
|
malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
|
|
return prof_gdump_old;
|
|
}
|
|
|
|
void
|
|
prof_boot0(void) {
|
|
cassert(config_prof);
|
|
|
|
memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
|
|
sizeof(PROF_PREFIX_DEFAULT));
|
|
}
|
|
|
|
void
|
|
prof_boot1(void) {
|
|
cassert(config_prof);
|
|
|
|
/*
|
|
* opt_prof must be in its final state before any arenas are
|
|
* initialized, so this function must be executed early.
|
|
*/
|
|
|
|
if (opt_prof_leak && !opt_prof) {
|
|
/*
|
|
* Enable opt_prof, but in such a way that profiles are never
|
|
* automatically dumped.
|
|
*/
|
|
opt_prof = true;
|
|
opt_prof_gdump = false;
|
|
} else if (opt_prof) {
|
|
if (opt_lg_prof_interval >= 0) {
|
|
prof_interval = (((uint64_t)1U) <<
|
|
opt_lg_prof_interval);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool
|
|
prof_boot2(tsd_t *tsd, base_t *base) {
|
|
cassert(config_prof);
|
|
|
|
if (opt_prof) {
|
|
unsigned i;
|
|
|
|
lg_prof_sample = opt_lg_prof_sample;
|
|
prof_unbias_map_init();
|
|
|
|
prof_active = opt_prof_active;
|
|
if (malloc_mutex_init(&prof_active_mtx, "prof_active",
|
|
WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
|
|
prof_gdump_val = opt_prof_gdump;
|
|
if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
|
|
WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
|
|
prof_thread_active_init = opt_prof_thread_active_init;
|
|
if (malloc_mutex_init(&prof_thread_active_init_mtx,
|
|
"prof_thread_active_init",
|
|
WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
|
|
malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
|
|
if (prof_data_init(tsd)) {
|
|
return true;
|
|
}
|
|
|
|
if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
|
|
WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
|
|
if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
|
|
WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
|
|
next_thr_uid = 0;
|
|
if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
|
|
WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
|
|
if (prof_idump_accum_init()) {
|
|
return true;
|
|
}
|
|
|
|
if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
|
|
WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
|
|
WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
|
|
if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
|
|
atexit(prof_fdump) != 0) {
|
|
malloc_write("<jemalloc>: Error in atexit()\n");
|
|
if (opt_abort) {
|
|
abort();
|
|
}
|
|
}
|
|
|
|
if (prof_log_init(tsd)) {
|
|
return true;
|
|
}
|
|
|
|
if (prof_recent_init()) {
|
|
return true;
|
|
}
|
|
|
|
prof_base = base;
|
|
|
|
gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), base,
|
|
PROF_NCTX_LOCKS * sizeof(malloc_mutex_t), CACHELINE);
|
|
if (gctx_locks == NULL) {
|
|
return true;
|
|
}
|
|
for (i = 0; i < PROF_NCTX_LOCKS; i++) {
|
|
if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
|
|
WITNESS_RANK_PROF_GCTX,
|
|
malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), base,
|
|
PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t), CACHELINE);
|
|
if (tdata_locks == NULL) {
|
|
return true;
|
|
}
|
|
for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
|
|
if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
|
|
WITNESS_RANK_PROF_TDATA,
|
|
malloc_mutex_rank_exclusive)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
prof_unwind_init();
|
|
}
|
|
prof_booted = true;
|
|
|
|
return false;
|
|
}
|
|
|
|
void
|
|
prof_prefork0(tsdn_t *tsdn) {
|
|
if (config_prof && opt_prof) {
|
|
unsigned i;
|
|
|
|
malloc_mutex_prefork(tsdn, &prof_dump_mtx);
|
|
malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
|
|
malloc_mutex_prefork(tsdn, &tdatas_mtx);
|
|
for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
|
|
malloc_mutex_prefork(tsdn, &tdata_locks[i]);
|
|
}
|
|
malloc_mutex_prefork(tsdn, &log_mtx);
|
|
for (i = 0; i < PROF_NCTX_LOCKS; i++) {
|
|
malloc_mutex_prefork(tsdn, &gctx_locks[i]);
|
|
}
|
|
malloc_mutex_prefork(tsdn, &prof_recent_dump_mtx);
|
|
}
|
|
}
|
|
|
|
void
|
|
prof_prefork1(tsdn_t *tsdn) {
|
|
if (config_prof && opt_prof) {
|
|
counter_prefork(tsdn, &prof_idump_accumulated);
|
|
malloc_mutex_prefork(tsdn, &prof_active_mtx);
|
|
malloc_mutex_prefork(tsdn, &prof_dump_filename_mtx);
|
|
malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
|
|
malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
|
|
malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
|
|
malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
|
|
}
|
|
}
|
|
|
|
void
|
|
prof_postfork_parent(tsdn_t *tsdn) {
|
|
if (config_prof && opt_prof) {
|
|
unsigned i;
|
|
|
|
malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
|
|
malloc_mutex_postfork_parent(tsdn,
|
|
&prof_thread_active_init_mtx);
|
|
malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
|
|
malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
|
|
malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
|
|
malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
|
|
counter_postfork_parent(tsdn, &prof_idump_accumulated);
|
|
malloc_mutex_postfork_parent(tsdn, &prof_recent_dump_mtx);
|
|
for (i = 0; i < PROF_NCTX_LOCKS; i++) {
|
|
malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
|
|
}
|
|
malloc_mutex_postfork_parent(tsdn, &log_mtx);
|
|
for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
|
|
malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
|
|
}
|
|
malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
|
|
malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
|
|
malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
|
|
}
|
|
}
|
|
|
|
void
|
|
prof_postfork_child(tsdn_t *tsdn) {
|
|
if (config_prof && opt_prof) {
|
|
unsigned i;
|
|
|
|
malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
|
|
malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
|
|
malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
|
|
malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
|
|
malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
|
|
malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
|
|
counter_postfork_child(tsdn, &prof_idump_accumulated);
|
|
malloc_mutex_postfork_child(tsdn, &prof_recent_dump_mtx);
|
|
for (i = 0; i < PROF_NCTX_LOCKS; i++) {
|
|
malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
|
|
}
|
|
malloc_mutex_postfork_child(tsdn, &log_mtx);
|
|
for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
|
|
malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
|
|
}
|
|
malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
|
|
malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
|
|
malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
|
|
}
|
|
}
|
|
|
|
/******************************************************************************/
|