From b55419f9b99ab416f035179593370401af8d213f Mon Sep 17 00:00:00 2001 From: Yinan Zhang Date: Tue, 19 Nov 2019 16:24:57 -0800 Subject: [PATCH] Restructure profiling Develop new data structure and code logic for holding profiling related information stored in the extent that may be needed after the extent is released, which in particular is the case for the reallocation code path (e.g. in `rallocx()` and `xallocx()`). The data structure is a generalization of `prof_tctx_t`: we previously only copy out the `prof_tctx` before the extent is released, but we may be in need of additional fields. Currently the only additional field is the allocation time field, but there may be more fields in the future. The restructuring also resolved a bug: `prof_realloc()` mistakenly passed the new `ptr` to `prof_free_sampled_object()`, but passing in the `old_ptr` would crash because it's already been released. Now the essential profiling information is collectively copied out early and safely passed to `prof_free_sampled_object()` after the extent is released. --- include/jemalloc/internal/arena_inlines_b.h | 44 +++++++++------------ include/jemalloc/internal/extent.h | 15 +++---- include/jemalloc/internal/large_externs.h | 5 +-- include/jemalloc/internal/prof_externs.h | 5 +-- include/jemalloc/internal/prof_inlines_b.h | 34 +++++++--------- include/jemalloc/internal/prof_structs.h | 7 ++++ include/jemalloc/internal/prof_types.h | 1 + src/jemalloc.c | 27 ++++++------- src/large.c | 12 ++---- src/prof.c | 9 +++-- src/prof_log.c | 5 ++- test/unit/prof_tctx.c | 10 ++--- 12 files changed, 82 insertions(+), 92 deletions(-) diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 7ac2f942..dd743cea 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -40,23 +40,31 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { return arena_choose(tsd, NULL); } -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { +JEMALLOC_ALWAYS_INLINE void +arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx, + prof_info_t *prof_info) { cassert(config_prof); assert(ptr != NULL); + assert(prof_info != NULL); + + const extent_t *extent; + bool is_slab; /* Static check. */ if (alloc_ctx == NULL) { - const extent_t *extent = iealloc(tsdn, ptr); - if (unlikely(!extent_slab_get(extent))) { - return large_prof_tctx_get(tsdn, extent); - } - } else { - if (unlikely(!alloc_ctx->slab)) { - return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr)); - } + extent = iealloc(tsdn, ptr); + is_slab = extent_slab_get(extent); + } else if (!unlikely(is_slab = alloc_ctx->slab)) { + extent = iealloc(tsdn, ptr); + } + + if (unlikely(!is_slab)) { + /* extent must have been initialized at this point. */ + large_prof_info_get(tsdn, extent, prof_info); + } else { + memset(prof_info, 0, sizeof(prof_info_t)); + prof_info->prof_tctx = (prof_tctx_t *)(uintptr_t)1U; } - return (prof_tctx_t *)(uintptr_t)1U; } JEMALLOC_ALWAYS_INLINE void @@ -89,20 +97,6 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { large_prof_tctx_reset(tsdn, extent); } -JEMALLOC_ALWAYS_INLINE nstime_t -arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) { - cassert(config_prof); - assert(ptr != NULL); - - extent_t *extent = iealloc(tsdn, ptr); - /* - * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're - * sure we have a sampled allocation. - */ - assert(!extent_slab_get(extent)); - return large_prof_alloc_time_get(extent); -} - JEMALLOC_ALWAYS_INLINE void arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) { cassert(config_prof); diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 92c34aec..c47beafd 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -333,15 +333,12 @@ extent_slab_data_get_const(const extent_t *extent) { return &extent->e_slab_data; } -static inline prof_tctx_t * -extent_prof_tctx_get(const extent_t *extent) { - return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx, - ATOMIC_ACQUIRE); -} - -static inline nstime_t -extent_prof_alloc_time_get(const extent_t *extent) { - return extent->e_alloc_time; +static inline void +extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) { + assert(prof_info != NULL); + prof_info->prof_tctx = (prof_tctx_t *)atomic_load_p( + &extent->e_prof_tctx, ATOMIC_ACQUIRE); + prof_info->alloc_time = extent->e_alloc_time; } static inline void diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h index a05019e8..9a1ff160 100644 --- a/include/jemalloc/internal/large_externs.h +++ b/include/jemalloc/internal/large_externs.h @@ -22,11 +22,10 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent); void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent); void large_dalloc(tsdn_t *tsdn, extent_t *extent); size_t large_salloc(tsdn_t *tsdn, const extent_t *extent); -prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent); +void large_prof_info_get(tsdn_t *tsdn, const extent_t *extent, + prof_info_t *prof_info); void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx); void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent); - -nstime_t large_prof_alloc_time_get(const extent_t *extent); void large_prof_alloc_time_set(extent_t *extent, nstime_t time); #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */ diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h index fd18ac48..47e47ba6 100644 --- a/include/jemalloc/internal/prof_externs.h +++ b/include/jemalloc/internal/prof_externs.h @@ -51,8 +51,7 @@ void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize); void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, - prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info); void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(tsd_t *tsd, prof_bt_t *bt); prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); @@ -102,7 +101,7 @@ void prof_postfork_parent(tsdn_t *tsdn); void prof_postfork_child(tsdn_t *tsdn); void prof_sample_threshold_update(tsd_t *tsd); -void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info); bool prof_log_start(tsdn_t *tsdn, const char *filename); bool prof_log_stop(tsdn_t *tsdn); bool prof_log_init(tsd_t *tsdn); diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h index 388537e6..5acb4ca1 100644 --- a/include/jemalloc/internal/prof_inlines_b.h +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -39,12 +39,14 @@ prof_tdata_get(tsd_t *tsd, bool create) { return tdata; } -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { +JEMALLOC_ALWAYS_INLINE void +prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx, + prof_info_t *prof_info) { cassert(config_prof); assert(ptr != NULL); + assert(prof_info != NULL); - return arena_prof_tctx_get(tsdn, ptr, alloc_ctx); + arena_prof_info_get(tsdn, ptr, alloc_ctx, prof_info); } JEMALLOC_ALWAYS_INLINE void @@ -64,14 +66,6 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { arena_prof_tctx_reset(tsdn, ptr, tctx); } -JEMALLOC_ALWAYS_INLINE nstime_t -prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) { - cassert(config_prof); - assert(ptr != NULL); - - return arena_prof_alloc_time_get(tsdn, ptr); -} - JEMALLOC_ALWAYS_INLINE void prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) { cassert(config_prof); @@ -152,7 +146,7 @@ prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx, JEMALLOC_ALWAYS_INLINE void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, size_t old_usize, - prof_tctx_t *old_tctx) { + prof_info_t *old_prof_info) { bool sampled, old_sampled, moved; cassert(config_prof); @@ -174,7 +168,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, } sampled = ((uintptr_t)tctx > (uintptr_t)1U); - old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_prof_info->prof_tctx > (uintptr_t)1U); moved = (ptr != old_ptr); if (unlikely(sampled)) { @@ -191,8 +185,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, */ prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx); } else { - assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) == - (uintptr_t)1U); + prof_info_t prof_info; + prof_info_get(tsd_tsdn(tsd), ptr, NULL, &prof_info); + assert((uintptr_t)prof_info.prof_tctx == (uintptr_t)1U); } /* @@ -203,19 +198,20 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, * counters. */ if (unlikely(old_sampled)) { - prof_free_sampled_object(tsd, ptr, old_usize, old_tctx); + prof_free_sampled_object(tsd, old_usize, old_prof_info); } } JEMALLOC_ALWAYS_INLINE void prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) { - prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx); + prof_info_t prof_info; + prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &prof_info); cassert(config_prof); assert(usize == isalloc(tsd_tsdn(tsd), ptr)); - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { - prof_free_sampled_object(tsd, ptr, usize, tctx); + if (unlikely((uintptr_t)prof_info.prof_tctx > (uintptr_t)1U)) { + prof_free_sampled_object(tsd, usize, &prof_info); } } diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h index 9a00a189..17a56508 100644 --- a/include/jemalloc/internal/prof_structs.h +++ b/include/jemalloc/internal/prof_structs.h @@ -96,6 +96,13 @@ struct prof_tctx_s { }; typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; +struct prof_info_s { + /* Points to the prof_tctx_t corresponding to the allocation. */ + prof_tctx_t *prof_tctx; + /* Time when the allocation was made. */ + nstime_t alloc_time; +}; + struct prof_gctx_s { /* Protects nlimbo, cnt_summed, and tctxs. */ malloc_mutex_t *lock; diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h index a50653bb..7a34385b 100644 --- a/include/jemalloc/internal/prof_types.h +++ b/include/jemalloc/internal/prof_types.h @@ -5,6 +5,7 @@ typedef struct prof_bt_s prof_bt_t; typedef struct prof_accum_s prof_accum_t; typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_info_s prof_info_t; typedef struct prof_gctx_s prof_gctx_t; typedef struct prof_tdata_s prof_tdata_t; diff --git a/src/jemalloc.c b/src/jemalloc.c index e8ac2fc9..17709923 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -3009,13 +3009,11 @@ JEMALLOC_ALWAYS_INLINE void * irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, size_t alignment, size_t *usize, bool zero, tcache_t *tcache, arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) { + prof_info_t old_prof_info; + prof_info_get(tsd_tsdn(tsd), old_ptr, alloc_ctx, &old_prof_info); + bool prof_active = prof_active_get_unlocked(); + prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false); void *p; - bool prof_active; - prof_tctx_t *old_tctx, *tctx; - - prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx); - tctx = prof_alloc_prep(tsd, *usize, prof_active, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize, *usize, alignment, zero, tcache, arena, tctx, hook_args); @@ -3040,7 +3038,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, *usize = isalloc(tsd_tsdn(tsd), p); } prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr, - old_usize, old_tctx); + old_usize, &old_prof_info); return p; } @@ -3262,18 +3260,15 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size, JEMALLOC_ALWAYS_INLINE size_t ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) { - size_t usize_max, usize; - bool prof_active; - prof_tctx_t *old_tctx, *tctx; - - prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx); + prof_info_t old_prof_info; + prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &old_prof_info); /* * usize isn't knowable before ixalloc() returns when extra is non-zero. * Therefore, compute its maximum possible value and use that in * prof_alloc_prep() to decide whether to capture a backtrace. * prof_realloc() will use the actual usize to decide whether to sample. */ + size_t usize_max; if (alignment == 0) { usize_max = sz_s2u(size+extra); assert(usize_max > 0 @@ -3292,8 +3287,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, } } thread_event(tsd, usize_max); - tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); + bool prof_active = prof_active_get_unlocked(); + prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); + size_t usize; if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize, size, extra, alignment, zero, tctx); @@ -3318,7 +3315,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, return usize; } prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize, - old_tctx); + &old_prof_info); return usize; } diff --git a/src/large.c b/src/large.c index 8aaa3ce2..6eeb7f49 100644 --- a/src/large.c +++ b/src/large.c @@ -367,9 +367,10 @@ large_salloc(tsdn_t *tsdn, const extent_t *extent) { return extent_usize_get(extent); } -prof_tctx_t * -large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) { - return extent_prof_tctx_get(extent); +void +large_prof_info_get(tsdn_t *tsdn, const extent_t *extent, + prof_info_t *prof_info) { + extent_prof_info_get(extent, prof_info); } void @@ -382,11 +383,6 @@ large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) { large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U); } -nstime_t -large_prof_alloc_time_get(const extent_t *extent) { - return extent_prof_alloc_time_get(extent); -} - void large_prof_alloc_time_set(extent_t *extent, nstime_t t) { extent_prof_alloc_time_set(extent, t); diff --git a/src/prof.c b/src/prof.c index 0590482c..ccac3c0f 100644 --- a/src/prof.c +++ b/src/prof.c @@ -187,8 +187,11 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, } void -prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, - prof_tctx_t *tctx) { +prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) { + assert(prof_info != NULL); + prof_tctx_t *tctx = prof_info->prof_tctx; + assert((uintptr_t)tctx > (uintptr_t)1U); + malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); assert(tctx->cnts.curobjs > 0); @@ -196,7 +199,7 @@ prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, tctx->cnts.curobjs--; tctx->cnts.curbytes -= usize; - prof_try_log(tsd, ptr, usize, tctx); + prof_try_log(tsd, usize, prof_info); if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) { prof_tctx_destroy(tsd, tctx); diff --git a/src/prof_log.c b/src/prof_log.c index 73ca7417..5747c8db 100644 --- a/src/prof_log.c +++ b/src/prof_log.c @@ -199,7 +199,8 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) { } void -prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { +prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) { + prof_tctx_t *tctx = prof_info->prof_tctx; malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false); @@ -229,7 +230,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { log_tables_initialized = true; } - nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr); + nstime_t alloc_time = prof_info->alloc_time; nstime_t free_time = NSTIME_ZERO_INITIALIZER; nstime_update(&free_time); diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c index ff3b2b0c..30df71b2 100644 --- a/test/unit/prof_tctx.c +++ b/test/unit/prof_tctx.c @@ -4,7 +4,7 @@ TEST_BEGIN(test_prof_realloc) { tsdn_t *tsdn; int flags; void *p, *q; - prof_tctx_t *tctx_p, *tctx_q; + prof_info_t prof_info_p, prof_info_q; uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3; test_skip_if(!config_prof); @@ -15,8 +15,8 @@ TEST_BEGIN(test_prof_realloc) { prof_cnt_all(&curobjs_0, NULL, NULL, NULL); p = mallocx(1024, flags); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tctx_p = prof_tctx_get(tsdn, p, NULL); - assert_ptr_ne(tctx_p, (prof_tctx_t *)(uintptr_t)1U, + prof_info_get(tsdn, p, NULL, &prof_info_p); + assert_ptr_ne(prof_info_p.prof_tctx, (prof_tctx_t *)(uintptr_t)1U, "Expected valid tctx"); prof_cnt_all(&curobjs_1, NULL, NULL, NULL); assert_u64_eq(curobjs_0 + 1, curobjs_1, @@ -25,8 +25,8 @@ TEST_BEGIN(test_prof_realloc) { q = rallocx(p, 2048, flags); assert_ptr_ne(p, q, "Expected move"); assert_ptr_not_null(p, "Unexpected rmallocx() failure"); - tctx_q = prof_tctx_get(tsdn, q, NULL); - assert_ptr_ne(tctx_q, (prof_tctx_t *)(uintptr_t)1U, + prof_info_get(tsdn, q, NULL, &prof_info_q); + assert_ptr_ne(prof_info_q.prof_tctx, (prof_tctx_t *)(uintptr_t)1U, "Expected valid tctx"); prof_cnt_all(&curobjs_2, NULL, NULL, NULL); assert_u64_eq(curobjs_1, curobjs_2,