From d01d5b8f4ab8d75250b235f1ee481a382946c8a4 Mon Sep 17 00:00:00 2001 From: guangli-dai Date: Mon, 1 Apr 2024 16:49:34 -0700 Subject: [PATCH] Change structs use when freeing to avoid using index2size for large sizes. 1. Change the definition of emap_alloc_ctx_t 2. Change the read of both from edata_t. 3. Change the assignment and usage of emap_alloc_ctx_t. 4. Change other callsites of index2size. Note for the changes in the data structure, i.e., emap_alloc_ctx_t, will be used when the build-time config (--enable-limit-usize-gap) is enabled but they will store the same value as index2size(szind) if the runtime option (opt_limit_usize_gap) is not enabled. --- include/jemalloc/internal/arena_inlines_b.h | 49 +++++++++++------ include/jemalloc/internal/edata.h | 45 +++++++++++++++- include/jemalloc/internal/emap.h | 54 ++++++++++++++++--- .../internal/jemalloc_internal_inlines_c.h | 5 +- src/arena.c | 6 ++- src/emap.c | 5 ++ src/jemalloc.c | 39 +++++++++----- src/tcache.c | 3 +- test/unit/arena_reset.c | 3 +- 9 files changed, 167 insertions(+), 42 deletions(-) diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index ea246cc5..099893eb 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -51,7 +51,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { } JEMALLOC_ALWAYS_INLINE bool -large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) { +large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) { if (!config_opt_safety_checks) { return false; } @@ -68,7 +68,6 @@ large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) { "possibly caused by double free bugs.", ptr); return true; } - size_t input_size = sz_index2size(szind); if (unlikely(input_size != edata_usize_get(edata))) { safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr, /* true_size */ edata_usize_get(edata), input_size); @@ -101,9 +100,10 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx, if (unlikely(!is_slab)) { /* edata must have been initialized at this point. */ assert(edata != NULL); + size_t usize = (alloc_ctx == NULL)? edata_usize_get(edata): + emap_alloc_ctx_usize_get(alloc_ctx); if (reset_recent && - large_dalloc_safety_checks(edata, ptr, - edata_szind_get(edata))) { + large_dalloc_safety_checks(edata, ptr, usize)) { prof_info->alloc_tctx = PROF_TCTX_SENTINEL; return; } @@ -225,7 +225,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) { emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); - return sz_index2size(alloc_ctx.szind); + return emap_alloc_ctx_usize_get(&alloc_ctx); } JEMALLOC_ALWAYS_INLINE size_t @@ -256,17 +256,19 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) { assert(full_alloc_ctx.szind != SC_NSIZES); - return sz_index2size(full_alloc_ctx.szind); + return config_limit_usize_gap? edata_usize_get(full_alloc_ctx.edata): + sz_index2size(full_alloc_ctx.szind); } static inline void -arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) { +arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind, + size_t usize) { if (config_prof && unlikely(szind < SC_NBINS)) { arena_dalloc_promoted(tsdn, ptr, NULL, true); } else { edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - if (large_dalloc_safety_checks(edata, ptr, szind)) { + if (large_dalloc_safety_checks(edata, ptr, usize)) { /* See the comment in isfree. */ return; } @@ -287,19 +289,22 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { assert(alloc_ctx.szind == edata_szind_get(edata)); assert(alloc_ctx.szind < SC_NSIZES); assert(alloc_ctx.slab == edata_slab_get(edata)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) == + edata_usize_get(edata)); } if (likely(alloc_ctx.slab)) { /* Small allocation. */ arena_dalloc_small(tsdn, ptr); } else { - arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind); + arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx)); } } JEMALLOC_ALWAYS_INLINE void arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind, - bool slow_path) { + size_t usize, bool slow_path) { assert (!tsdn_null(tsdn) && tcache != NULL); bool is_sample_promoted = config_prof && szind < SC_NBINS; if (unlikely(is_sample_promoted)) { @@ -313,7 +318,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind, } else { edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - if (large_dalloc_safety_checks(edata, ptr, szind)) { + if (large_dalloc_safety_checks(edata, ptr, usize)) { /* See the comment in isfree. */ return; } @@ -396,6 +401,8 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, assert(alloc_ctx.szind == edata_szind_get(edata)); assert(alloc_ctx.szind < SC_NSIZES); assert(alloc_ctx.slab == edata_slab_get(edata)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) == + edata_usize_get(edata)); } if (likely(alloc_ctx.slab)) { @@ -407,7 +414,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx.szind, slow_path); } else { arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, - slow_path); + emap_alloc_ctx_usize_get(&alloc_ctx), slow_path); } } @@ -422,8 +429,9 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { * There is no risk of being confused by a promoted sampled * object, so base szind and slab on the given size. */ - alloc_ctx.szind = sz_size2index(size); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + szind_t szind = sz_size2index(size); + emap_alloc_ctx_set(&alloc_ctx, szind, (szind < SC_NBINS), + size); } if ((config_prof && opt_prof) || config_debug) { @@ -446,7 +454,8 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { /* Small allocation. */ arena_dalloc_small(tsdn, ptr); } else { - arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind); + arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx)); } } @@ -469,6 +478,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); assert(alloc_ctx.szind == sz_size2index(size)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size); } else { alloc_ctx = *caller_alloc_ctx; } @@ -486,6 +496,11 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, ptr); assert(alloc_ctx.szind == edata_szind_get(edata)); assert(alloc_ctx.slab == edata_slab_get(edata)); + emap_alloc_ctx_set(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, + sz_s2u(size)); + assert(!config_limit_usize_gap || + emap_alloc_ctx_usize_get(&alloc_ctx) == + edata_usize_get(edata)); } if (likely(alloc_ctx.slab)) { @@ -496,8 +511,10 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path); } else { + emap_alloc_ctx_set(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, + sz_s2u(size)); arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, - slow_path); + emap_alloc_ctx_usize_get(&alloc_ctx), slow_path); } } diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index aeed7482..6d9967b9 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -288,10 +288,53 @@ edata_szind_get(const edata_t *edata) { } static inline size_t -edata_usize_get(const edata_t *edata) { +edata_usize_get_from_ind_unsafe(const edata_t *edata) { return sz_index2size(edata_szind_get(edata)); } +static inline size_t +edata_usize_get_from_ind(const edata_t *edata) { + size_t usize = edata_usize_get_from_ind_unsafe(edata); + assert(!sz_limit_usize_gap_enabled() || usize < SC_LARGE_MINCLASS); + return usize; +} + +static inline size_t +edata_usize_get_from_size(const edata_t *edata) { + size_t size = (edata->e_size_esn & EDATA_SIZE_MASK); + assert(size > sz_large_pad); + size_t usize = size - sz_large_pad; + /* + * No matter limit-usize-gap enabled or not, usize retrieved here is + * not accurate when smaller than SC_LAGE_MINCLASS. + */ + assert(usize >= SC_LARGE_MINCLASS); + return usize; +} + +static inline size_t +edata_usize_get(const edata_t *edata) { + /* + * When sz_limit_usize_gap_enabled() is true, two cases: + * 1. if usize_from_ind is not smaller than SC_LARGE_MINCLASS, + * usize_from_size is accurate; + * 2. otherwise, usize_from_ind is accurate. + * + * When sz_limit_usize_gap_enabled() is not true, the two should be the + * same when usize_from_ind is not smaller than SC_LARGE_MINCLASS. + */ + size_t usize_from_ind = edata_usize_get_from_ind_unsafe(edata); + if (!sz_limit_usize_gap_enabled() || + usize_from_ind < SC_LARGE_MINCLASS) { + assert(sz_limit_usize_gap_enabled() || + usize_from_ind < SC_LARGE_MINCLASS || + usize_from_ind == edata_usize_get_from_size(edata)); + return usize_from_ind; + } + + return edata_usize_get_from_size(edata); +} + static inline unsigned edata_binshard_get(const edata_t *edata) { unsigned binshard = (unsigned)((edata->e_bits & diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h index 7ac0ae95..1be12358 100644 --- a/include/jemalloc/internal/emap.h +++ b/include/jemalloc/internal/emap.h @@ -20,10 +20,11 @@ struct emap_s { }; /* Used to pass rtree lookup context down the path. */ -typedef struct emap_alloc_ctx_t emap_alloc_ctx_t; -struct emap_alloc_ctx_t { +typedef struct emap_alloc_ctx_s emap_alloc_ctx_t; +struct emap_alloc_ctx_s { szind_t szind; bool slab; + size_t usize; }; typedef struct emap_full_alloc_ctx_s emap_full_alloc_ctx_t; @@ -230,16 +231,53 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) { return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata; } +JEMALLOC_ALWAYS_INLINE void +emap_alloc_ctx_set(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab, + size_t usize) { + alloc_ctx->szind = szind; + alloc_ctx->slab = slab; + /* + * When config_limit_usize_gap disabled, alloc_ctx->usize + * should not be accessed. + */ + if (config_limit_usize_gap) { + alloc_ctx->usize = usize; + assert(sz_limit_usize_gap_enabled() || + usize == sz_index2size(szind)); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) { + assert(alloc_ctx->szind < SC_NSIZES); + if (alloc_ctx->slab || !config_limit_usize_gap) { + assert(!config_limit_usize_gap || + alloc_ctx->usize == sz_index2size(alloc_ctx->szind)); + return sz_index2size(alloc_ctx->szind); + } + assert(sz_limit_usize_gap_enabled() || + alloc_ctx->usize == sz_index2size(alloc_ctx->szind)); + return alloc_ctx->usize; +} + /* Fills in alloc_ctx with the info in the map. */ JEMALLOC_ALWAYS_INLINE void emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr, emap_alloc_ctx_t *alloc_ctx) { EMAP_DECLARE_RTREE_CTX; - rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree, - rtree_ctx, (uintptr_t)ptr); - alloc_ctx->szind = metadata.szind; - alloc_ctx->slab = metadata.slab; + if (config_limit_usize_gap) { + rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, + rtree_ctx, (uintptr_t)ptr); + assert(contents.edata != NULL); + emap_alloc_ctx_set(alloc_ctx, contents.metadata.szind, + contents.metadata.slab, edata_usize_get(contents.edata)); + } else { + rtree_metadata_t metadata = rtree_metadata_read(tsdn, + &emap->rtree, rtree_ctx, (uintptr_t)ptr); + /* alloc_ctx->usize will not be read/write in this case. */ + emap_alloc_ctx_set(alloc_ctx, metadata.szind, metadata.slab, 0); + } } /* The pointer must be mapped. */ @@ -293,6 +331,10 @@ emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr, if (err) { return true; } + /* + * Small allocs using the fastpath can always use index to get the + * usize. Therefore, do not set alloc_ctx->usize here. + */ alloc_ctx->szind = metadata.szind; alloc_ctx->slab = metadata.slab; return false; diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 854aec1e..c7ef9161 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -425,8 +425,9 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) { if (alloc_ctx->szind != dbg_ctx.szind) { safety_check_fail_sized_dealloc( /* current_dealloc */ true, ptr, - /* true_size */ sz_index2size(dbg_ctx.szind), - /* input_size */ sz_index2size(alloc_ctx->szind)); + /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx), + /* input_size */ emap_alloc_ctx_usize_get( + alloc_ctx)); return true; } if (alloc_ctx->slab != dbg_ctx.slab) { diff --git a/src/arena.c b/src/arena.c index a66486b0..4b8d8170 100644 --- a/src/arena.c +++ b/src/arena.c @@ -822,7 +822,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { assert(alloc_ctx.szind != SC_NSIZES); if (config_stats || (config_prof && opt_prof)) { - usize = sz_index2size(alloc_ctx.szind); + usize = emap_alloc_ctx_usize_get(&alloc_ctx); assert(usize == isalloc(tsd_tsdn(tsd), ptr)); } /* Remove large allocation from prof sample set. */ @@ -1366,7 +1366,9 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, assert(sz_can_use_slab(size)); return arena_malloc_small(tsdn, arena, ind, zero); } else { - return large_malloc(tsdn, arena, sz_index2size(ind), zero); + size_t usize = sz_limit_usize_gap_enabled()? sz_s2u(size): + sz_index2size(ind); + return large_malloc(tsdn, arena, usize, zero); } } diff --git a/src/emap.c b/src/emap.c index f7d5c25a..63216c87 100644 --- a/src/emap.c +++ b/src/emap.c @@ -73,6 +73,11 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata, return NULL; } + /* + * Info in neighbor_contents may be inaccurate before + * extent_can_acquire_neighbor below confirms the neighbor can be + * acquired. Set dependent to false to avoid reading usize. + */ rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm, /* dependent */ false); if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai, diff --git a/src/jemalloc.c b/src/jemalloc.c index 01c770aa..94949cc0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2203,6 +2203,7 @@ malloc_init_hard(void) { if (config_limit_usize_gap) { assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD); + assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD); } #if defined(_WIN32) && _WIN32_WINNT < 0x0600 _init_init_lock(); @@ -2384,7 +2385,8 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind, if (unlikely(*ind >= SC_NSIZES)) { return true; } - *usize = sz_index2size(*ind); + *usize = sz_limit_usize_gap_enabled()? sz_s2u(size): + sz_index2size(*ind); assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS); return false; } @@ -2932,7 +2934,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); - size_t usize = sz_index2size(alloc_ctx.szind); + size_t usize = emap_alloc_ctx_usize_get(&alloc_ctx); if (config_prof && opt_prof) { prof_free(tsd, ptr, usize, &alloc_ctx); } @@ -2964,35 +2966,38 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { assert(malloc_initialized() || IS_INITIALIZER); emap_alloc_ctx_t alloc_ctx; + szind_t szind = sz_size2index(usize); if (!config_prof) { - alloc_ctx.szind = sz_size2index(usize); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + emap_alloc_ctx_set(&alloc_ctx, szind, (szind < SC_NBINS), + usize); } else { if (likely(!prof_sample_aligned(ptr))) { /* * When the ptr is not page aligned, it was not sampled. * usize can be trusted to determine szind and slab. */ - alloc_ctx.szind = sz_size2index(usize); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + emap_alloc_ctx_set(&alloc_ctx, szind, + (szind < SC_NBINS), usize); } else if (opt_prof) { emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx); if (config_opt_safety_checks) { /* Small alloc may have !slab (sampled). */ + size_t true_size = + emap_alloc_ctx_usize_get(&alloc_ctx); if (unlikely(alloc_ctx.szind != sz_size2index(usize))) { safety_check_fail_sized_dealloc( /* current_dealloc */ true, ptr, - /* true_size */ sz_index2size( - alloc_ctx.szind), + /* true_size */ true_size, /* input_size */ usize); } } } else { - alloc_ctx.szind = sz_size2index(usize); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + szind_t szind = sz_size2index(usize); + emap_alloc_ctx_set(&alloc_ctx, szind, + (szind < SC_NBINS), usize); } } bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx); @@ -3494,7 +3499,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) { emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); - old_usize = sz_index2size(alloc_ctx.szind); + old_usize = emap_alloc_ctx_usize_get(&alloc_ctx); assert(old_usize == isalloc(tsd_tsdn(tsd), ptr)); if (aligned_usize_get(size, alignment, &usize, NULL, false)) { goto label_oom; @@ -3716,7 +3721,15 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, prof_info_get(tsd, ptr, alloc_ctx, &prof_info); prof_alloc_rollback(tsd, tctx); } else { - prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info); + /* + * Need to retrieve the new alloc_ctx since the modification + * to edata has already been done. + */ + emap_alloc_ctx_t new_alloc_ctx; + emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, + &new_alloc_ctx); + prof_info_get_and_reset_recent(tsd, ptr, &new_alloc_ctx, + &prof_info); assert(usize <= usize_max); sample_event = te_prof_sample_event_lookahead(tsd, usize); prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr, @@ -3756,7 +3769,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); - old_usize = sz_index2size(alloc_ctx.szind); + old_usize = emap_alloc_ctx_usize_get(&alloc_ctx); assert(old_usize == isalloc(tsd_tsdn(tsd), ptr)); /* * The API explicitly absolves itself of protecting against (size + diff --git a/src/tcache.c b/src/tcache.c index 15da14da..270d38ac 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -1047,7 +1047,8 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin ndeferred++; continue; } - if (large_dalloc_safety_checks(edata, ptr, binind)) { + if (large_dalloc_safety_checks(edata, ptr, + sz_index2size(binind))) { /* See the comment in isfree. */ continue; } diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c index 8ef0786c..09536b29 100644 --- a/test/unit/arena_reset.c +++ b/test/unit/arena_reset.c @@ -78,7 +78,8 @@ vsalloc(tsdn_t *tsdn, const void *ptr) { return 0; } - return sz_index2size(full_alloc_ctx.szind); + return config_limit_usize_gap? edata_usize_get(full_alloc_ctx.edata): + sz_index2size(full_alloc_ctx.szind); } static unsigned