diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 7ac2f942..dd743cea 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -40,23 +40,31 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { return arena_choose(tsd, NULL); } -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { +JEMALLOC_ALWAYS_INLINE void +arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx, + prof_info_t *prof_info) { cassert(config_prof); assert(ptr != NULL); + assert(prof_info != NULL); + + const extent_t *extent; + bool is_slab; /* Static check. */ if (alloc_ctx == NULL) { - const extent_t *extent = iealloc(tsdn, ptr); - if (unlikely(!extent_slab_get(extent))) { - return large_prof_tctx_get(tsdn, extent); - } - } else { - if (unlikely(!alloc_ctx->slab)) { - return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr)); - } + extent = iealloc(tsdn, ptr); + is_slab = extent_slab_get(extent); + } else if (!unlikely(is_slab = alloc_ctx->slab)) { + extent = iealloc(tsdn, ptr); + } + + if (unlikely(!is_slab)) { + /* extent must have been initialized at this point. */ + large_prof_info_get(tsdn, extent, prof_info); + } else { + memset(prof_info, 0, sizeof(prof_info_t)); + prof_info->prof_tctx = (prof_tctx_t *)(uintptr_t)1U; } - return (prof_tctx_t *)(uintptr_t)1U; } JEMALLOC_ALWAYS_INLINE void @@ -89,20 +97,6 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { large_prof_tctx_reset(tsdn, extent); } -JEMALLOC_ALWAYS_INLINE nstime_t -arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) { - cassert(config_prof); - assert(ptr != NULL); - - extent_t *extent = iealloc(tsdn, ptr); - /* - * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're - * sure we have a sampled allocation. - */ - assert(!extent_slab_get(extent)); - return large_prof_alloc_time_get(extent); -} - JEMALLOC_ALWAYS_INLINE void arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) { cassert(config_prof); diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 92c34aec..c47beafd 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -333,15 +333,12 @@ extent_slab_data_get_const(const extent_t *extent) { return &extent->e_slab_data; } -static inline prof_tctx_t * -extent_prof_tctx_get(const extent_t *extent) { - return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx, - ATOMIC_ACQUIRE); -} - -static inline nstime_t -extent_prof_alloc_time_get(const extent_t *extent) { - return extent->e_alloc_time; +static inline void +extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) { + assert(prof_info != NULL); + prof_info->prof_tctx = (prof_tctx_t *)atomic_load_p( + &extent->e_prof_tctx, ATOMIC_ACQUIRE); + prof_info->alloc_time = extent->e_alloc_time; } static inline void diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h index a05019e8..9a1ff160 100644 --- a/include/jemalloc/internal/large_externs.h +++ b/include/jemalloc/internal/large_externs.h @@ -22,11 +22,10 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent); void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent); void large_dalloc(tsdn_t *tsdn, extent_t *extent); size_t large_salloc(tsdn_t *tsdn, const extent_t *extent); -prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent); +void large_prof_info_get(tsdn_t *tsdn, const extent_t *extent, + prof_info_t *prof_info); void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx); void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent); - -nstime_t large_prof_alloc_time_get(const extent_t *extent); void large_prof_alloc_time_set(extent_t *extent, nstime_t time); #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */ diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h index fd18ac48..47e47ba6 100644 --- a/include/jemalloc/internal/prof_externs.h +++ b/include/jemalloc/internal/prof_externs.h @@ -51,8 +51,7 @@ void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize); void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, - prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info); void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(tsd_t *tsd, prof_bt_t *bt); prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); @@ -102,7 +101,7 @@ void prof_postfork_parent(tsdn_t *tsdn); void prof_postfork_child(tsdn_t *tsdn); void prof_sample_threshold_update(tsd_t *tsd); -void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info); bool prof_log_start(tsdn_t *tsdn, const char *filename); bool prof_log_stop(tsdn_t *tsdn); bool prof_log_init(tsd_t *tsdn); diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h index 388537e6..5acb4ca1 100644 --- a/include/jemalloc/internal/prof_inlines_b.h +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -39,12 +39,14 @@ prof_tdata_get(tsd_t *tsd, bool create) { return tdata; } -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { +JEMALLOC_ALWAYS_INLINE void +prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx, + prof_info_t *prof_info) { cassert(config_prof); assert(ptr != NULL); + assert(prof_info != NULL); - return arena_prof_tctx_get(tsdn, ptr, alloc_ctx); + arena_prof_info_get(tsdn, ptr, alloc_ctx, prof_info); } JEMALLOC_ALWAYS_INLINE void @@ -64,14 +66,6 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { arena_prof_tctx_reset(tsdn, ptr, tctx); } -JEMALLOC_ALWAYS_INLINE nstime_t -prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) { - cassert(config_prof); - assert(ptr != NULL); - - return arena_prof_alloc_time_get(tsdn, ptr); -} - JEMALLOC_ALWAYS_INLINE void prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) { cassert(config_prof); @@ -152,7 +146,7 @@ prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx, JEMALLOC_ALWAYS_INLINE void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, size_t old_usize, - prof_tctx_t *old_tctx) { + prof_info_t *old_prof_info) { bool sampled, old_sampled, moved; cassert(config_prof); @@ -174,7 +168,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, } sampled = ((uintptr_t)tctx > (uintptr_t)1U); - old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_prof_info->prof_tctx > (uintptr_t)1U); moved = (ptr != old_ptr); if (unlikely(sampled)) { @@ -191,8 +185,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, */ prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx); } else { - assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) == - (uintptr_t)1U); + prof_info_t prof_info; + prof_info_get(tsd_tsdn(tsd), ptr, NULL, &prof_info); + assert((uintptr_t)prof_info.prof_tctx == (uintptr_t)1U); } /* @@ -203,19 +198,20 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, * counters. */ if (unlikely(old_sampled)) { - prof_free_sampled_object(tsd, ptr, old_usize, old_tctx); + prof_free_sampled_object(tsd, old_usize, old_prof_info); } } JEMALLOC_ALWAYS_INLINE void prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) { - prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx); + prof_info_t prof_info; + prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &prof_info); cassert(config_prof); assert(usize == isalloc(tsd_tsdn(tsd), ptr)); - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { - prof_free_sampled_object(tsd, ptr, usize, tctx); + if (unlikely((uintptr_t)prof_info.prof_tctx > (uintptr_t)1U)) { + prof_free_sampled_object(tsd, usize, &prof_info); } } diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h index 9a00a189..17a56508 100644 --- a/include/jemalloc/internal/prof_structs.h +++ b/include/jemalloc/internal/prof_structs.h @@ -96,6 +96,13 @@ struct prof_tctx_s { }; typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; +struct prof_info_s { + /* Points to the prof_tctx_t corresponding to the allocation. */ + prof_tctx_t *prof_tctx; + /* Time when the allocation was made. */ + nstime_t alloc_time; +}; + struct prof_gctx_s { /* Protects nlimbo, cnt_summed, and tctxs. */ malloc_mutex_t *lock; diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h index a50653bb..7a34385b 100644 --- a/include/jemalloc/internal/prof_types.h +++ b/include/jemalloc/internal/prof_types.h @@ -5,6 +5,7 @@ typedef struct prof_bt_s prof_bt_t; typedef struct prof_accum_s prof_accum_t; typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_info_s prof_info_t; typedef struct prof_gctx_s prof_gctx_t; typedef struct prof_tdata_s prof_tdata_t; diff --git a/src/jemalloc.c b/src/jemalloc.c index e8ac2fc9..17709923 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -3009,13 +3009,11 @@ JEMALLOC_ALWAYS_INLINE void * irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, size_t alignment, size_t *usize, bool zero, tcache_t *tcache, arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) { + prof_info_t old_prof_info; + prof_info_get(tsd_tsdn(tsd), old_ptr, alloc_ctx, &old_prof_info); + bool prof_active = prof_active_get_unlocked(); + prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false); void *p; - bool prof_active; - prof_tctx_t *old_tctx, *tctx; - - prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx); - tctx = prof_alloc_prep(tsd, *usize, prof_active, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize, *usize, alignment, zero, tcache, arena, tctx, hook_args); @@ -3040,7 +3038,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, *usize = isalloc(tsd_tsdn(tsd), p); } prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr, - old_usize, old_tctx); + old_usize, &old_prof_info); return p; } @@ -3262,18 +3260,15 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size, JEMALLOC_ALWAYS_INLINE size_t ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) { - size_t usize_max, usize; - bool prof_active; - prof_tctx_t *old_tctx, *tctx; - - prof_active = prof_active_get_unlocked(); - old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx); + prof_info_t old_prof_info; + prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &old_prof_info); /* * usize isn't knowable before ixalloc() returns when extra is non-zero. * Therefore, compute its maximum possible value and use that in * prof_alloc_prep() to decide whether to capture a backtrace. * prof_realloc() will use the actual usize to decide whether to sample. */ + size_t usize_max; if (alignment == 0) { usize_max = sz_s2u(size+extra); assert(usize_max > 0 @@ -3292,8 +3287,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, } } thread_event(tsd, usize_max); - tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); + bool prof_active = prof_active_get_unlocked(); + prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); + size_t usize; if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize, size, extra, alignment, zero, tctx); @@ -3318,7 +3315,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, return usize; } prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize, - old_tctx); + &old_prof_info); return usize; } diff --git a/src/large.c b/src/large.c index 8aaa3ce2..6eeb7f49 100644 --- a/src/large.c +++ b/src/large.c @@ -367,9 +367,10 @@ large_salloc(tsdn_t *tsdn, const extent_t *extent) { return extent_usize_get(extent); } -prof_tctx_t * -large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) { - return extent_prof_tctx_get(extent); +void +large_prof_info_get(tsdn_t *tsdn, const extent_t *extent, + prof_info_t *prof_info) { + extent_prof_info_get(extent, prof_info); } void @@ -382,11 +383,6 @@ large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) { large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U); } -nstime_t -large_prof_alloc_time_get(const extent_t *extent) { - return extent_prof_alloc_time_get(extent); -} - void large_prof_alloc_time_set(extent_t *extent, nstime_t t) { extent_prof_alloc_time_set(extent, t); diff --git a/src/prof.c b/src/prof.c index 0590482c..ccac3c0f 100644 --- a/src/prof.c +++ b/src/prof.c @@ -187,8 +187,11 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, } void -prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, - prof_tctx_t *tctx) { +prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) { + assert(prof_info != NULL); + prof_tctx_t *tctx = prof_info->prof_tctx; + assert((uintptr_t)tctx > (uintptr_t)1U); + malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); assert(tctx->cnts.curobjs > 0); @@ -196,7 +199,7 @@ prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, tctx->cnts.curobjs--; tctx->cnts.curbytes -= usize; - prof_try_log(tsd, ptr, usize, tctx); + prof_try_log(tsd, usize, prof_info); if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) { prof_tctx_destroy(tsd, tctx); diff --git a/src/prof_log.c b/src/prof_log.c index 73ca7417..5747c8db 100644 --- a/src/prof_log.c +++ b/src/prof_log.c @@ -199,7 +199,8 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) { } void -prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { +prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) { + prof_tctx_t *tctx = prof_info->prof_tctx; malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false); @@ -229,7 +230,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { log_tables_initialized = true; } - nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr); + nstime_t alloc_time = prof_info->alloc_time; nstime_t free_time = NSTIME_ZERO_INITIALIZER; nstime_update(&free_time); diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c index ff3b2b0c..30df71b2 100644 --- a/test/unit/prof_tctx.c +++ b/test/unit/prof_tctx.c @@ -4,7 +4,7 @@ TEST_BEGIN(test_prof_realloc) { tsdn_t *tsdn; int flags; void *p, *q; - prof_tctx_t *tctx_p, *tctx_q; + prof_info_t prof_info_p, prof_info_q; uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3; test_skip_if(!config_prof); @@ -15,8 +15,8 @@ TEST_BEGIN(test_prof_realloc) { prof_cnt_all(&curobjs_0, NULL, NULL, NULL); p = mallocx(1024, flags); assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tctx_p = prof_tctx_get(tsdn, p, NULL); - assert_ptr_ne(tctx_p, (prof_tctx_t *)(uintptr_t)1U, + prof_info_get(tsdn, p, NULL, &prof_info_p); + assert_ptr_ne(prof_info_p.prof_tctx, (prof_tctx_t *)(uintptr_t)1U, "Expected valid tctx"); prof_cnt_all(&curobjs_1, NULL, NULL, NULL); assert_u64_eq(curobjs_0 + 1, curobjs_1, @@ -25,8 +25,8 @@ TEST_BEGIN(test_prof_realloc) { q = rallocx(p, 2048, flags); assert_ptr_ne(p, q, "Expected move"); assert_ptr_not_null(p, "Unexpected rmallocx() failure"); - tctx_q = prof_tctx_get(tsdn, q, NULL); - assert_ptr_ne(tctx_q, (prof_tctx_t *)(uintptr_t)1U, + prof_info_get(tsdn, q, NULL, &prof_info_q); + assert_ptr_ne(prof_info_q.prof_tctx, (prof_tctx_t *)(uintptr_t)1U, "Expected valid tctx"); prof_cnt_all(&curobjs_2, NULL, NULL, NULL); assert_u64_eq(curobjs_1, curobjs_2,