diff --git a/configure.ac b/configure.ac index b01ff56b..a55a5a08 100644 --- a/configure.ac +++ b/configure.ac @@ -2732,6 +2732,24 @@ if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" -a \ AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ]) fi +dnl ============================================================================ +dnl Limit the gap between two contiguous usizes to be at most PAGE. +AC_ARG_ENABLE([limit_usize_gap], + [AS_HELP_STRING([--enable-limit-usize-gap], + [Limit the gap between two contiguous usizes])], +[if test "x$limit_usize_gap" = "xno" ; then + limit_usize_gap="0" +else + limit_usize_gap="1" +fi +], +[limit_usize_gap="0"] +) +if test "x$limit_usize_gap" = "x1" ; then + AC_DEFINE([LIMIT_USIZE_GAP], [ ]) +fi +AC_SUBST([limit_usize_gap]) + dnl ============================================================================ dnl Check for glibc malloc hooks @@ -2997,4 +3015,5 @@ AC_MSG_RESULT([cxx : ${enable_cxx}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([tsan : ${enable_tsan}]) AC_MSG_RESULT([ubsan : ${enable_ubsan}]) +AC_MSG_RESULT([limit-usize-gap : ${limit_usize_gap}]) AC_MSG_RESULT([===============================================================================]) diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index ea246cc5..108493f2 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -51,7 +51,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { } JEMALLOC_ALWAYS_INLINE bool -large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) { +large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) { if (!config_opt_safety_checks) { return false; } @@ -68,7 +68,6 @@ large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) { "possibly caused by double free bugs.", ptr); return true; } - size_t input_size = sz_index2size(szind); if (unlikely(input_size != edata_usize_get(edata))) { safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr, /* true_size */ edata_usize_get(edata), input_size); @@ -101,9 +100,10 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx, if (unlikely(!is_slab)) { /* edata must have been initialized at this point. */ assert(edata != NULL); + size_t usize = (alloc_ctx == NULL)? edata_usize_get(edata): + emap_alloc_ctx_usize_get(alloc_ctx); if (reset_recent && - large_dalloc_safety_checks(edata, ptr, - edata_szind_get(edata))) { + large_dalloc_safety_checks(edata, ptr, usize)) { prof_info->alloc_tctx = PROF_TCTX_SENTINEL; return; } @@ -225,7 +225,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) { emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); - return sz_index2size(alloc_ctx.szind); + return emap_alloc_ctx_usize_get(&alloc_ctx); } JEMALLOC_ALWAYS_INLINE size_t @@ -256,17 +256,24 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) { assert(full_alloc_ctx.szind != SC_NSIZES); - return sz_index2size(full_alloc_ctx.szind); + return edata_usize_get(full_alloc_ctx.edata); } static inline void -arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) { +arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind, + size_t usize) { + /* + * szind is still needed in this function mainly becuase + * szind < SC_NBINS determines not only if this is a small alloc, + * but also if szind is valid (an inactive extent would have + * szind == SC_NSIZES). + */ if (config_prof && unlikely(szind < SC_NBINS)) { arena_dalloc_promoted(tsdn, ptr, NULL, true); } else { edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - if (large_dalloc_safety_checks(edata, ptr, szind)) { + if (large_dalloc_safety_checks(edata, ptr, usize)) { /* See the comment in isfree. */ return; } @@ -287,19 +294,22 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { assert(alloc_ctx.szind == edata_szind_get(edata)); assert(alloc_ctx.szind < SC_NSIZES); assert(alloc_ctx.slab == edata_slab_get(edata)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) == + edata_usize_get(edata)); } if (likely(alloc_ctx.slab)) { /* Small allocation. */ arena_dalloc_small(tsdn, ptr); } else { - arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind); + arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx)); } } JEMALLOC_ALWAYS_INLINE void arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind, - bool slow_path) { + size_t usize, bool slow_path) { assert (!tsdn_null(tsdn) && tcache != NULL); bool is_sample_promoted = config_prof && szind < SC_NBINS; if (unlikely(is_sample_promoted)) { @@ -313,7 +323,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind, } else { edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - if (large_dalloc_safety_checks(edata, ptr, szind)) { + if (large_dalloc_safety_checks(edata, ptr, usize)) { /* See the comment in isfree. */ return; } @@ -396,6 +406,8 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, assert(alloc_ctx.szind == edata_szind_get(edata)); assert(alloc_ctx.szind < SC_NSIZES); assert(alloc_ctx.slab == edata_slab_get(edata)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) == + edata_usize_get(edata)); } if (likely(alloc_ctx.slab)) { @@ -407,7 +419,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx.szind, slow_path); } else { arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, - slow_path); + emap_alloc_ctx_usize_get(&alloc_ctx), slow_path); } } @@ -422,8 +434,9 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { * There is no risk of being confused by a promoted sampled * object, so base szind and slab on the given size. */ - alloc_ctx.szind = sz_size2index(size); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + szind_t szind = sz_size2index(size); + emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS), + size); } if ((config_prof && opt_prof) || config_debug) { @@ -446,7 +459,8 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { /* Small allocation. */ arena_dalloc_small(tsdn, ptr); } else { - arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind); + arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind, + emap_alloc_ctx_usize_get(&alloc_ctx)); } } @@ -469,6 +483,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); assert(alloc_ctx.szind == sz_size2index(size)); + assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size); } else { alloc_ctx = *caller_alloc_ctx; } @@ -486,6 +501,11 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, ptr); assert(alloc_ctx.szind == edata_szind_get(edata)); assert(alloc_ctx.slab == edata_slab_get(edata)); + emap_alloc_ctx_init(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, + sz_s2u(size)); + assert(!config_limit_usize_gap || + emap_alloc_ctx_usize_get(&alloc_ctx) == + edata_usize_get(edata)); } if (likely(alloc_ctx.slab)) { @@ -497,7 +517,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, alloc_ctx.szind, slow_path); } else { arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind, - slow_path); + sz_s2u(size), slow_path); } } diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h index 3d512630..7f075114 100644 --- a/include/jemalloc/internal/arena_stats.h +++ b/include/jemalloc/internal/arena_stats.h @@ -14,12 +14,18 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS typedef struct arena_stats_large_s arena_stats_large_t; struct arena_stats_large_s { /* - * Total number of allocation/deallocation requests served directly by - * the arena. + * Total number of large allocation/deallocation requests served directly + * by the arena. */ locked_u64_t nmalloc; locked_u64_t ndalloc; + /* + * Total large active bytes (allocated - deallocated) served directly + * by the arena. + */ + locked_u64_t active_bytes; + /* * Number of allocation requests that correspond to this size class. * This includes requests served by tcache, though tcache only diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 2381ccbc..b087ea31 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -21,6 +21,14 @@ */ #define EDATA_ALIGNMENT 128 +/* + * Defines how many nodes visited when enumerating the heap to search for + * qualifed extents. More nodes visited may result in better choices at + * the cost of longer search time. This size should not exceed 2^16 - 1 + * because we use uint16_t for accessing the queue needed for enumeration. + */ +#define ESET_ENUMERATE_MAX_NUM 32 + enum extent_state_e { extent_state_active = 0, extent_state_dirty = 1, @@ -89,8 +97,8 @@ struct edata_cmp_summary_s { /* Extent (span of pages). Use accessor functions for e_* fields. */ typedef struct edata_s edata_t; -ph_structs(edata_avail, edata_t); -ph_structs(edata_heap, edata_t); +ph_structs(edata_avail, edata_t, ESET_ENUMERATE_MAX_NUM); +ph_structs(edata_heap, edata_t, ESET_ENUMERATE_MAX_NUM); struct edata_s { /* * Bitfield containing several fields: @@ -281,7 +289,54 @@ edata_szind_get(const edata_t *edata) { static inline size_t edata_usize_get(const edata_t *edata) { - return sz_index2size(edata_szind_get(edata)); + assert(edata != NULL); + /* + * When sz_limit_usize_gap_enabled() is true, two cases: + * 1. if usize_from_ind is not smaller than SC_LARGE_MINCLASS, + * usize_from_size is accurate; + * 2. otherwise, usize_from_ind is accurate. + * + * When sz_limit_usize_gap_enabled() is not true, the two should be the + * same when usize_from_ind is not smaller than SC_LARGE_MINCLASS. + * + * Note sampled small allocs will be promoted. Their extent size is + * recorded in edata_size_get(edata), while their szind reflects the + * true usize. Thus, usize retrieved here is still accurate for + * sampled small allocs. + */ + szind_t szind = edata_szind_get(edata); +#ifdef JEMALLOC_JET + /* + * Double free is invalid and results in undefined behavior. However, + * for double free tests to end gracefully, return an invalid usize + * when szind shows the edata is not active, i.e., szind == SC_NSIZES. + */ + if (unlikely(szind == SC_NSIZES)) { + return SC_LARGE_MAXCLASS + 1; + } +#endif + + if (!sz_limit_usize_gap_enabled() || szind < SC_NBINS) { + size_t usize_from_ind = sz_index2size(szind); + if (!sz_limit_usize_gap_enabled() && + usize_from_ind >= SC_LARGE_MINCLASS) { + size_t size = (edata->e_size_esn & EDATA_SIZE_MASK); + assert(size > sz_large_pad); + size_t usize_from_size = size - sz_large_pad; + assert(usize_from_ind == usize_from_size); + } + return usize_from_ind; + } + + size_t size = (edata->e_size_esn & EDATA_SIZE_MASK); + assert(size > sz_large_pad); + size_t usize_from_size = size - sz_large_pad; + /* + * no matter limit-usize-gap enabled or not, usize retrieved from size + * is not accurate when smaller than SC_LARGE_MINCLASS. + */ + assert(usize_from_size >= SC_LARGE_MINCLASS); + return usize_from_size; } static inline unsigned diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h index 7ac0ae95..5885daa6 100644 --- a/include/jemalloc/internal/emap.h +++ b/include/jemalloc/internal/emap.h @@ -20,8 +20,9 @@ struct emap_s { }; /* Used to pass rtree lookup context down the path. */ -typedef struct emap_alloc_ctx_t emap_alloc_ctx_t; -struct emap_alloc_ctx_t { +typedef struct emap_alloc_ctx_s emap_alloc_ctx_t; +struct emap_alloc_ctx_s { + size_t usize; szind_t szind; bool slab; }; @@ -230,16 +231,66 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) { return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata; } +JEMALLOC_ALWAYS_INLINE void +emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab, + size_t usize) { + alloc_ctx->szind = szind; + alloc_ctx->slab = slab; + /* + * When config_limit_usize_gap disabled, alloc_ctx->usize + * should not be accessed. + */ + if (config_limit_usize_gap) { + alloc_ctx->usize = usize; + assert(sz_limit_usize_gap_enabled() || + usize == sz_index2size(szind)); + } else if (config_debug) { + alloc_ctx->usize = SC_LARGE_MAXCLASS + 1; + } +} + +JEMALLOC_ALWAYS_INLINE size_t +emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) { + assert(alloc_ctx->szind < SC_NSIZES); + if (!config_limit_usize_gap || alloc_ctx->slab) { + assert(!config_limit_usize_gap || + alloc_ctx->usize == sz_index2size(alloc_ctx->szind)); + return sz_index2size(alloc_ctx->szind); + } + assert(sz_limit_usize_gap_enabled() || + alloc_ctx->usize == sz_index2size(alloc_ctx->szind)); + assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS); + return alloc_ctx->usize; +} + /* Fills in alloc_ctx with the info in the map. */ JEMALLOC_ALWAYS_INLINE void emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr, emap_alloc_ctx_t *alloc_ctx) { EMAP_DECLARE_RTREE_CTX; - rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree, - rtree_ctx, (uintptr_t)ptr); - alloc_ctx->szind = metadata.szind; - alloc_ctx->slab = metadata.slab; + if (config_limit_usize_gap) { + rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, + rtree_ctx, (uintptr_t)ptr); + /* + * If the alloc is invalid, do not calculate usize since edata + * could be corrupted. + */ + if (contents.metadata.szind == SC_NSIZES || + contents.edata == NULL) { + emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind, + contents.metadata.slab, 0); + return; + } + emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind, + contents.metadata.slab, edata_usize_get(contents.edata)); + } else { + rtree_metadata_t metadata = rtree_metadata_read(tsdn, + &emap->rtree, rtree_ctx, (uintptr_t)ptr); + /* alloc_ctx->usize will not be read/write in this case. */ + emap_alloc_ctx_init(alloc_ctx, metadata.szind, metadata.slab, + SC_LARGE_MAXCLASS + 1); + } } /* The pointer must be mapped. */ @@ -293,8 +344,15 @@ emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr, if (err) { return true; } + /* + * Small allocs using the fastpath can always use index to get the + * usize. Therefore, do not set alloc_ctx->usize here. + */ alloc_ctx->szind = metadata.szind; alloc_ctx->slab = metadata.slab; + if (config_debug) { + alloc_ctx->usize = SC_LARGE_MAXCLASS + 1; + } return false; } diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h index 7ba92112..a8a845ec 100644 --- a/include/jemalloc/internal/hpdata.h +++ b/include/jemalloc/internal/hpdata.h @@ -20,8 +20,14 @@ * an observable property of any given region of address space). It's just * hugepage-sized and hugepage-aligned; it's *potentially* huge. */ + +/* + * The max enumeration num should not exceed 2^16 - 1, see comments in edata.h + * for ESET_ENUMERATE_MAX_NUM for more details. + */ +#define PSSET_ENUMERATE_MAX_NUM 32 typedef struct hpdata_s hpdata_t; -ph_structs(hpdata_age_heap, hpdata_t); +ph_structs(hpdata_age_heap, hpdata_t, PSSET_ENUMERATE_MAX_NUM); struct hpdata_s { /* * We likewise follow the edata convention of mangling names and forcing diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 742d599d..e76eaaf4 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -475,6 +475,12 @@ /* If defined, use __int128 for optimization. */ #undef JEMALLOC_HAVE_INT128 +/* + * If defined, the gap between any two contiguous usizes should not exceed + * PAGE. + */ +#undef LIMIT_USIZE_GAP + #include "jemalloc/internal/jemalloc_internal_overrides.h" #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index 2c6b58f7..8c6df450 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -39,6 +39,7 @@ extern atomic_zu_t zero_realloc_count; extern bool opt_cache_oblivious; extern unsigned opt_debug_double_free_max_scan; extern size_t opt_calloc_madvise_threshold; +extern bool opt_limit_usize_gap; extern const char *opt_malloc_conf_symlink; extern const char *opt_malloc_conf_env_var; diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 854aec1e..c7ef9161 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -425,8 +425,9 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) { if (alloc_ctx->szind != dbg_ctx.szind) { safety_check_fail_sized_dealloc( /* current_dealloc */ true, ptr, - /* true_size */ sz_index2size(dbg_ctx.szind), - /* input_size */ sz_index2size(alloc_ctx->szind)); + /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx), + /* input_size */ emap_alloc_ctx_usize_get( + alloc_ctx)); return true; } if (alloc_ctx->slab != dbg_ctx.slab) { diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in index a59c3489..ef637a2d 100644 --- a/include/jemalloc/internal/jemalloc_preamble.h.in +++ b/include/jemalloc/internal/jemalloc_preamble.h.in @@ -276,4 +276,12 @@ static const bool have_memcntl = #endif ; +static const bool config_limit_usize_gap = +#ifdef LIMIT_USIZE_GAP + true +#else + false +#endif + ; + #endif /* JEMALLOC_PREAMBLE_H */ diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h index ef9634be..05376004 100644 --- a/include/jemalloc/internal/ph.h +++ b/include/jemalloc/internal/ph.h @@ -75,6 +75,16 @@ struct ph_s { size_t auxcount; }; +typedef struct ph_enumerate_vars_s ph_enumerate_vars_t; +struct ph_enumerate_vars_s { + uint16_t front; + uint16_t rear; + uint16_t queue_size; + uint16_t visited_num; + uint16_t max_visit_num; + uint16_t max_queue_size; +}; + JEMALLOC_ALWAYS_INLINE phn_link_t * phn_link_get(void *phn, size_t offset) { return (phn_link_t *)(((char *)phn) + offset); @@ -414,14 +424,98 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) { } } -#define ph_structs(a_prefix, a_type) \ +JEMALLOC_ALWAYS_INLINE void +ph_enumerate_vars_init(ph_enumerate_vars_t *vars, uint16_t max_visit_num, + uint16_t max_queue_size) { + vars->queue_size = 0; + vars->visited_num = 0; + vars->front = 0; + vars->rear = 0; + vars->max_visit_num = max_visit_num; + vars->max_queue_size = max_queue_size; + assert(vars->max_visit_num > 0); + /* + * max_queue_size must be able to support max_visit_num, which means + * the queue will not overflow before reaching max_visit_num. + */ + assert(vars->max_queue_size >= (vars->max_visit_num + 1)/2); +} + +JEMALLOC_ALWAYS_INLINE void +ph_enumerate_queue_push(void *phn, void **bfs_queue, + ph_enumerate_vars_t *vars) { + assert(vars->queue_size < vars->max_queue_size); + bfs_queue[vars->rear] = phn; + vars->rear = (vars->rear + 1) % vars->max_queue_size; + (vars->queue_size) ++; +} + +JEMALLOC_ALWAYS_INLINE void * +ph_enumerate_queue_pop(void **bfs_queue, ph_enumerate_vars_t *vars) { + assert(vars->queue_size > 0); + assert(vars->queue_size <= vars->max_queue_size); + void *ret = bfs_queue[vars->front]; + vars->front = (vars->front + 1) % vars->max_queue_size; + (vars->queue_size) --; + return ret; +} + + +/* + * The two functions below offer a solution to enumerate the pairing heap. + * Whe enumerating, always call ph_enumerate_prepare first to prepare the queue + * needed for BFS. Next, call ph_enumerate_next to get the next element in + * the enumeration. When enumeration ends, ph_enumerate_next returns NULL and + * should not be called again. Enumeration ends when all elements in the heap + * has been enumerated or the number of visited elements exceed + * max_visit_num. + */ +JEMALLOC_ALWAYS_INLINE void +ph_enumerate_prepare(ph_t *ph, void **bfs_queue, ph_enumerate_vars_t *vars, + uint16_t max_visit_num, uint16_t max_queue_size) { + ph_enumerate_vars_init(vars, max_visit_num, max_queue_size); + ph_enumerate_queue_push(ph->root, bfs_queue, vars); +} + +JEMALLOC_ALWAYS_INLINE void * +ph_enumerate_next(ph_t *ph, size_t offset, void **bfs_queue, + ph_enumerate_vars_t *vars) { + if (vars->queue_size == 0) { + return NULL; + } + + (vars->visited_num) ++; + if (vars->visited_num > vars->max_visit_num) { + return NULL; + } + + void *ret = ph_enumerate_queue_pop(bfs_queue, vars); + assert(ret != NULL); + void *left = phn_lchild_get(ret, offset); + void *right = phn_next_get(ret, offset); + if (left) { + ph_enumerate_queue_push(left, bfs_queue, vars); + } + if (right) { + ph_enumerate_queue_push(right, bfs_queue, vars); + } + return ret; +} + +#define ph_structs(a_prefix, a_type, a_max_queue_size) \ typedef struct { \ phn_link_t link; \ } a_prefix##_link_t; \ \ typedef struct { \ ph_t ph; \ -} a_prefix##_t; +} a_prefix##_t; \ + \ +typedef struct { \ + void *bfs_queue[a_max_queue_size]; \ + ph_enumerate_vars_t vars; \ +} a_prefix##_enumerate_helper_t; + /* * The ph_proto() macro generates function prototypes that correspond to the @@ -436,7 +530,12 @@ a_attr a_type *a_prefix##_any(a_prefix##_t *ph); \ a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn); \ a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph); \ a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn); \ -a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph); +a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph); \ +a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph, \ + a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num, \ + uint16_t max_queue_size); \ +a_attr a_type *a_prefix##_enumerate_next(a_prefix##_t *ph, \ + a_prefix##_enumerate_helper_t *helper); /* The ph_gen() macro generates a type-specific pairing heap implementation. */ #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp) \ @@ -491,6 +590,21 @@ a_prefix##_remove_any(a_prefix##_t *ph) { \ a_prefix##_remove(ph, ret); \ } \ return ret; \ +} \ + \ +a_attr void \ +a_prefix##_enumerate_prepare(a_prefix##_t *ph, \ + a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num, \ + uint16_t max_queue_size) { \ + ph_enumerate_prepare(&ph->ph, helper->bfs_queue, &helper->vars, \ + max_visit_num, max_queue_size); \ +} \ + \ +a_attr a_type * \ +a_prefix##_enumerate_next(a_prefix##_t *ph, \ + a_prefix##_enumerate_helper_t *helper) { \ + return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field), \ + helper->bfs_queue, &helper->vars); \ } #endif /* JEMALLOC_INTERNAL_PH_H */ diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h index 770835cc..098e47b7 100644 --- a/include/jemalloc/internal/sc.h +++ b/include/jemalloc/internal/sc.h @@ -286,6 +286,24 @@ # endif #endif +/* + * When config_limit_usize_gap is enabled, the gaps between two contiguous + * size classes should not exceed PAGE. This means there should be no concept + * of size classes for sizes > SC_SMALL_MAXCLASS (or >= SC_LARGE_MINCLASS). + * However, between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and + * 2 * SC_NGROUP * PAGE, the size class also happens to be aligned with PAGE. + * Since tcache relies on size classes to work and it greatly increases the + * perf of allocs & deallocs, we extend the existence of size class to + * 2 * SC_NGROUP * PAGE ONLY for the tcache module. This means for all other + * modules, there is no size class for sizes >= SC_LARGE_MINCLASS. Yet for + * tcache, the threshold is moved up to 2 * SC_NGROUP * PAGE, which is + * USIZE_GROW_SLOW_THRESHOLD defined below. With the default SC_NGROUP being + * 2, and PAGE being 4KB, the threshold for tcache (USIZE_GROW_SLOW_THRESHOLD) + * is 32KB. + */ +#define LG_USIZE_GROW_SLOW_THRESHOLD (SC_LG_NGROUP + LG_PAGE + 1) +#define USIZE_GROW_SLOW_THRESHOLD (1U << LG_USIZE_GROW_SLOW_THRESHOLD) + #define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS) typedef struct sc_s sc_t; diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h index a2d2debc..6c0a1f0c 100644 --- a/include/jemalloc/internal/sz.h +++ b/include/jemalloc/internal/sz.h @@ -54,6 +54,15 @@ extern size_t sz_large_pad; extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious); +JEMALLOC_ALWAYS_INLINE bool +sz_limit_usize_gap_enabled() { +#ifdef LIMIT_USIZE_GAP + return opt_limit_usize_gap; +#else + return false; +#endif +} + JEMALLOC_ALWAYS_INLINE pszind_t sz_psz2ind(size_t psz) { assert(psz > 0); @@ -257,11 +266,34 @@ sz_index2size_lookup(szind_t index) { } JEMALLOC_ALWAYS_INLINE size_t -sz_index2size(szind_t index) { +sz_index2size_unsafe(szind_t index) { assert(index < SC_NSIZES); return sz_index2size_lookup(index); } +JEMALLOC_ALWAYS_INLINE size_t +sz_index2size(szind_t index) { + assert(!sz_limit_usize_gap_enabled() || + index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD)); + size_t size = sz_index2size_unsafe(index); + /* + * With limit_usize_gap enabled, the usize above + * SC_LARGE_MINCLASS should grow by PAGE. However, for sizes + * in [SC_LARGE_MINCLASS, USIZE_GROW_SLOW_THRESHOLD], the + * usize would not change because the size class gap in this + * range is just the same as PAGE. Although we use + * SC_LARGE_MINCLASS as the threshold in most places, we + * allow tcache and sec to cache up to + * USIZE_GROW_SLOW_THRESHOLD to minimize the side effect of + * not having size classes for larger sizes. Thus, we assert + * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here + * instead of SC_LARGE_MINCLASS. + */ + assert(!sz_limit_usize_gap_enabled() || + size <= USIZE_GROW_SLOW_THRESHOLD); + return size; +} + JEMALLOC_ALWAYS_INLINE void sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) { if (util_compile_time_const(size)) { @@ -296,7 +328,7 @@ sz_s2u_compute(size_t size) { (ZU(1) << lg_ceil)); } #endif - { + if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) { size_t x = lg_floor((size<<1)-1); size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - SC_LG_NGROUP - 1; @@ -304,11 +336,22 @@ sz_s2u_compute(size_t size) { size_t delta_mask = delta - 1; size_t usize = (size + delta_mask) & ~delta_mask; return usize; + } else { + /* + * With sz_limit_usize_gap_enabled() == true, usize of a large + * allocation is calculated by ceiling size to the smallest + * multiple of PAGE to minimize the memory overhead, especially + * when using hugepages. + */ + size_t usize = PAGE_CEILING(size); + assert(usize - size < PAGE); + return usize; } } JEMALLOC_ALWAYS_INLINE size_t sz_s2u_lookup(size_t size) { + assert(!config_limit_usize_gap || size < SC_LARGE_MINCLASS); size_t ret = sz_index2size_lookup(sz_size2index_lookup(size)); assert(ret == sz_s2u_compute(size)); diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h index eebad79f..f13ff748 100644 --- a/include/jemalloc/internal/tcache_types.h +++ b/include/jemalloc/internal/tcache_types.h @@ -19,7 +19,11 @@ typedef struct tcaches_s tcaches_t; /* NOLINTNEXTLINE(performance-no-int-to-ptr) */ #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1) -#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */ +#ifdef LIMIT_USIZE_GAP + #define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD +#else + #define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */ +#endif #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT) #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP * \ (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1) diff --git a/src/arena.c b/src/arena.c index ab6006d7..54ecc403 100644 --- a/src/arena.c +++ b/src/arena.c @@ -145,8 +145,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, assert(nmalloc - ndalloc <= SIZE_T_MAX); size_t curlextents = (size_t)(nmalloc - ndalloc); lstats[i].curlextents += curlextents; - astats->allocated_large += - curlextents * sz_index2size(SC_NBINS + i); + + if (config_limit_usize_gap) { + uint64_t active_bytes = locked_read_u64(tsdn, + LOCKEDINT_MTX(arena->stats.mtx), + &arena->stats.lstats[i].active_bytes); + locked_inc_u64_unsynchronized( + &lstats[i].active_bytes, active_bytes); + astats->allocated_large += active_bytes; + } else { + astats->allocated_large += + curlextents * sz_index2size(SC_NBINS + i); + } } pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats, @@ -315,6 +325,11 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) { LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx); locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[hindex].nmalloc, 1); + if (config_limit_usize_gap) { + locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), + &arena->stats.lstats[hindex].active_bytes, + usize); + } LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); } } @@ -338,6 +353,11 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) { LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx); locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[hindex].ndalloc, 1); + if (config_limit_usize_gap) { + locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), + &arena->stats.lstats[hindex].active_bytes, + usize); + } LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); } } @@ -802,7 +822,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { assert(alloc_ctx.szind != SC_NSIZES); if (config_stats || (config_prof && opt_prof)) { - usize = sz_index2size(alloc_ctx.szind); + usize = emap_alloc_ctx_usize_get(&alloc_ctx); assert(usize == isalloc(tsd_tsdn(tsd), ptr)); } /* Remove large allocation from prof sample set. */ @@ -1346,7 +1366,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, assert(sz_can_use_slab(size)); return arena_malloc_small(tsdn, arena, ind, zero); } else { - return large_malloc(tsdn, arena, sz_index2size(ind), zero); + return large_malloc(tsdn, arena, sz_s2u(size), zero); } } diff --git a/src/ctl.c b/src/ctl.c index 1ebcbf8e..73d4cb66 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -168,6 +168,7 @@ CTL_PROTO(opt_prof_sys_thread_name) CTL_PROTO(opt_prof_time_res) CTL_PROTO(opt_lg_san_uaf_align) CTL_PROTO(opt_zero_realloc) +CTL_PROTO(opt_limit_usize_gap) CTL_PROTO(opt_malloc_conf_symlink) CTL_PROTO(opt_malloc_conf_env_var) CTL_PROTO(opt_malloc_conf_global_var) @@ -557,6 +558,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("zero_realloc"), CTL(opt_zero_realloc)}, {NAME("debug_double_free_max_scan"), CTL(opt_debug_double_free_max_scan)}, + {NAME("limit_usize_gap"), CTL(opt_limit_usize_gap)}, {NAME("malloc_conf"), CHILD(named, opt_malloc_conf)} }; @@ -2341,6 +2343,8 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align, opt_lg_san_uaf_align, ssize_t) CTL_RO_NL_GEN(opt_zero_realloc, zero_realloc_mode_names[opt_zero_realloc_action], const char *) +CTL_RO_NL_CGEN(config_limit_usize_gap, opt_limit_usize_gap, opt_limit_usize_gap, + bool) /* malloc_conf options */ CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink, @@ -3364,8 +3368,8 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, } CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned) -CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(SC_NBINS+(szind_t)mib[2]), - size_t) +CTL_RO_NL_GEN(arenas_lextent_i_size, + sz_index2size_unsafe(SC_NBINS+(szind_t)mib[2]), size_t) static const ctl_named_node_t * arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { diff --git a/src/eset.c b/src/eset.c index 6f8f335e..7dc9cce7 100644 --- a/src/eset.c +++ b/src/eset.c @@ -155,6 +155,71 @@ eset_remove(eset_t *eset, edata_t *edata) { cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED); } +edata_t * +eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind, + size_t alignment) { + if (edata_heap_empty(&eset->bins[bin_ind].heap)) { + return NULL; + } + + edata_t *edata = NULL; + edata_heap_enumerate_helper_t helper; + edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper, + ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *)); + while ((edata = + edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) != + NULL) { + uintptr_t base = (uintptr_t)edata_base_get(edata); + size_t candidate_size = edata_size_get(edata); + if (candidate_size < size) { + continue; + } + + uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base, + PAGE_CEILING(alignment)); + if (base > next_align || base + candidate_size <= next_align) { + /* Overflow or not crossing the next alignment. */ + continue; + } + + size_t leadsize = next_align - base; + if (candidate_size - leadsize >= size) { + return edata; + } + } + + return NULL; +} + +edata_t * +eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind, + bool exact_only, edata_cmp_summary_t *ret_summ) { + if (edata_heap_empty(&eset->bins[bin_ind].heap)) { + return NULL; + } + + edata_t *ret = NULL, *edata = NULL; + edata_heap_enumerate_helper_t helper; + edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper, + ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *)); + while ((edata = + edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) != + NULL) { + if ((!exact_only && edata_size_get(edata) >= size) || + (exact_only && edata_size_get(edata) == size)) { + edata_cmp_summary_t temp_summ = + edata_cmp_summary_get(edata); + if (ret == NULL || edata_cmp_summary_comp(temp_summ, + *ret_summ) < 0) { + ret = edata; + *ret_summ = temp_summ; + } + } + } + + return ret; +} + /* * Find an extent with size [min_size, max_size) to satisfy the alignment * requirement. For each size, try only the first extent in the heap. @@ -162,8 +227,19 @@ eset_remove(eset_t *eset, edata_t *edata) { static edata_t * eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size, size_t alignment) { - pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size)); - pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size)); + pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size)); + pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size)); + + /* See comments in eset_first_fit for why we enumerate search below. */ + pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size)); + if (sz_limit_usize_gap_enabled() && pind != pind_prev) { + edata_t *ret = NULL; + ret = eset_enumerate_alignment_search(eset, min_size, pind_prev, + alignment); + if (ret != NULL) { + return ret; + } + } for (pszind_t i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind); @@ -211,8 +287,43 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only, pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size)); if (exact_only) { - return edata_heap_empty(&eset->bins[pind].heap) ? NULL : - edata_heap_first(&eset->bins[pind].heap); + if (sz_limit_usize_gap_enabled()) { + pszind_t pind_prev = + sz_psz2ind(sz_psz_quantize_floor(size)); + return eset_enumerate_search(eset, size, pind_prev, + /* exact_only */ true, &ret_summ); + } else { + return edata_heap_empty(&eset->bins[pind].heap) ? NULL: + edata_heap_first(&eset->bins[pind].heap); + } + } + + /* + * Each element in the eset->bins is a heap corresponding to a size + * class. When sz_limit_usize_gap_enabled() is false, all heaps after + * pind (including pind itself) will surely satisfy the rquests while + * heaps before pind cannot satisfy the request because usize is + * calculated based on size classes then. However, when + * sz_limit_usize_gap_enabled() is true, usize is calculated by ceiling + * user requested size to the closest multiple of PAGE. This means in + * the heap before pind, i.e., pind_prev, there may exist extents able + * to satisfy the request and we should enumerate the heap when + * pind_prev != pind. + * + * For example, when PAGE=4KB and the user requested size is 1MB + 4KB, + * usize would be 1.25MB when sz_limit_usize_gap_enabled() is false. + * pind points to the heap containing extents ranging in + * [1.25MB, 1.5MB). Thus, searching starting from pind will not miss + * any candidates. When sz_limit_usize_gap_enabled() is true, the + * usize would be 1MB + 4KB and pind still points to the same heap. + * In this case, the heap pind_prev points to, which contains extents + * in the range [1MB, 1.25MB), may contain candidates satisfying the + * usize and thus should be enumerated. + */ + pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size)); + if (sz_limit_usize_gap_enabled() && pind != pind_prev){ + ret = eset_enumerate_search(eset, size, pind_prev, + /* exact_only */ false, &ret_summ); } for (pszind_t i = diff --git a/src/hpa.c b/src/hpa.c index 932cf201..2a5d7e1f 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -706,7 +706,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *deferred_work_generated) { assert(size <= HUGEPAGE); assert(size <= shard->opts.slab_max_alloc || - size == sz_index2size(sz_size2index(size))); + size == sz_s2u(size)); bool oom = false; size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom, diff --git a/src/jemalloc.c b/src/jemalloc.c index 31d4cb27..67456bb7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -123,6 +123,13 @@ zero_realloc_action_t opt_zero_realloc_action = atomic_zu_t zero_realloc_count = ATOMIC_INIT(0); +bool opt_limit_usize_gap = +#ifdef LIMIT_USIZE_GAP + true; +#else + false; +#endif + const char *const zero_realloc_mode_names[] = { "alloc", "free", @@ -1578,8 +1585,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true); CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc, - "hpa_sec_max_alloc", PAGE, 0, CONF_CHECK_MIN, - CONF_DONT_CHECK_MAX, true); + "hpa_sec_max_alloc", PAGE, USIZE_GROW_SLOW_THRESHOLD, + CONF_CHECK_MIN, CONF_CHECK_MAX, true); CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes, "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true); @@ -1763,6 +1770,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], "san_guard_large", 0, SIZE_T_MAX, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false) + if (config_limit_usize_gap) { + CONF_HANDLE_BOOL(opt_limit_usize_gap, + "limit_usize_gap"); + } + CONF_ERROR("Invalid conf pair", k, klen, v, vlen); #undef CONF_ERROR #undef CONF_CONTINUE @@ -2182,6 +2194,17 @@ static bool malloc_init_hard(void) { tsd_t *tsd; + if (config_limit_usize_gap) { + assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD); + assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD); + /* + * This asserts an extreme case where TINY_MAXCLASS is larger + * than LARGE_MINCLASS. It could only happen if some constants + * are configured miserably wrong. + */ + assert(SC_LG_TINY_MAXCLASS <= + (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP)); + } #if defined(_WIN32) && _WIN32_WINNT < 0x0600 _init_init_lock(); #endif @@ -2376,7 +2399,8 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind, if (unlikely(*ind >= SC_NSIZES)) { return true; } - *usize = sz_index2size(*ind); + *usize = sz_limit_usize_gap_enabled()? sz_s2u(size): + sz_index2size(*ind); assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS); return false; } @@ -2924,7 +2948,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); - size_t usize = sz_index2size(alloc_ctx.szind); + size_t usize = emap_alloc_ctx_usize_get(&alloc_ctx); if (config_prof && opt_prof) { prof_free(tsd, ptr, usize, &alloc_ctx); } @@ -2956,35 +2980,41 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { assert(malloc_initialized() || IS_INITIALIZER); emap_alloc_ctx_t alloc_ctx; + szind_t szind = sz_size2index(usize); if (!config_prof) { - alloc_ctx.szind = sz_size2index(usize); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS), + usize); } else { if (likely(!prof_sample_aligned(ptr))) { /* * When the ptr is not page aligned, it was not sampled. * usize can be trusted to determine szind and slab. */ - alloc_ctx.szind = sz_size2index(usize); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + emap_alloc_ctx_init(&alloc_ctx, szind, + (szind < SC_NBINS), usize); } else if (opt_prof) { + /* + * Small sampled allocs promoted can still get correct + * usize here. Check comments in edata_usize_get. + */ emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx); if (config_opt_safety_checks) { /* Small alloc may have !slab (sampled). */ + size_t true_size = + emap_alloc_ctx_usize_get(&alloc_ctx); if (unlikely(alloc_ctx.szind != sz_size2index(usize))) { safety_check_fail_sized_dealloc( /* current_dealloc */ true, ptr, - /* true_size */ sz_index2size( - alloc_ctx.szind), + /* true_size */ true_size, /* input_size */ usize); } } } else { - alloc_ctx.szind = sz_size2index(usize); - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); + emap_alloc_ctx_init(&alloc_ctx, szind, + (szind < SC_NBINS), usize); } } bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx); @@ -3486,7 +3516,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) { emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); - old_usize = sz_index2size(alloc_ctx.szind); + old_usize = emap_alloc_ctx_usize_get(&alloc_ctx); assert(old_usize == isalloc(tsd_tsdn(tsd), ptr)); if (aligned_usize_get(size, alignment, &usize, NULL, false)) { goto label_oom; @@ -3756,7 +3786,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); - old_usize = sz_index2size(alloc_ctx.szind); + old_usize = emap_alloc_ctx_usize_get(&alloc_ctx); assert(old_usize == isalloc(tsd_tsdn(tsd), ptr)); /* * The API explicitly absolves itself of protecting against (size + diff --git a/src/prof_data.c b/src/prof_data.c index 39af0c90..437673ee 100644 --- a/src/prof_data.c +++ b/src/prof_data.c @@ -513,7 +513,13 @@ void prof_unbias_map_init(void) { /* See the comment in prof_sample_new_event_wait */ #ifdef JEMALLOC_PROF for (szind_t i = 0; i < SC_NSIZES; i++) { - double sz = (double)sz_index2size(i); + /* + * When limit_usize_gap is enabled, the unbiased calculation + * here is not as accurate as it was because usize now changes + * in a finer grain while the unbiased_sz is still calculated + * using the old way. + */ + double sz = (double)sz_index2size_unsafe(i); double rate = (double)(ZU(1) << lg_prof_sample); double div_val = 1.0 - exp(-sz / rate); double unbiased_sz = sz / div_val; diff --git a/src/psset.c b/src/psset.c index 9a833193..e617f426 100644 --- a/src/psset.c +++ b/src/psset.c @@ -337,18 +337,50 @@ psset_update_end(psset_t *psset, hpdata_t *ps) { hpdata_assert_consistent(ps); } +hpdata_t * +psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) { + if (hpdata_age_heap_empty(&psset->pageslabs[pind])) { + return NULL; + } + + hpdata_t *ps = NULL; + hpdata_age_heap_enumerate_helper_t helper; + hpdata_age_heap_enumerate_prepare(&psset->pageslabs[pind], &helper, + PSSET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *)); + + while ((ps = hpdata_age_heap_enumerate_next(&psset->pageslabs[pind], + &helper))) { + if (hpdata_longest_free_range_get(ps) >= size) { + return ps; + } + } + + return NULL; +} + hpdata_t * psset_pick_alloc(psset_t *psset, size_t size) { assert((size & PAGE_MASK) == 0); assert(size <= HUGEPAGE); pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size)); + hpdata_t *ps = NULL; + + /* See comments in eset_first_fit for why we enumerate search below. */ + pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size)); + if (sz_limit_usize_gap_enabled() && pind_prev < min_pind) { + ps = psset_enumerate_search(psset, pind_prev, size); + if (ps != NULL) { + return ps; + } + } + pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)min_pind); if (pind == PSSET_NPSIZES) { return hpdata_empty_list_first(&psset->empty); } - hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]); + ps = hpdata_age_heap_first(&psset->pageslabs[pind]); if (ps == NULL) { return NULL; } diff --git a/src/sec.c b/src/sec.c index 19d69ff4..8827d1bd 100644 --- a/src/sec.c +++ b/src/sec.c @@ -24,6 +24,13 @@ bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback, const sec_opts_t *opts) { assert(opts->max_alloc >= PAGE); + /* + * Same as tcache, sec do not cache allocs/dallocs larger than + * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases + * by PAGE and the number of usizes is too large. + */ + assert(!sz_limit_usize_gap_enabled() || + opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD); size_t max_alloc = PAGE_FLOOR(opts->max_alloc); pszind_t npsizes = sz_psz2ind(max_alloc) + 1; diff --git a/src/tcache.c b/src/tcache.c index 15da14da..270d38ac 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -1047,7 +1047,8 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin ndeferred++; continue; } - if (large_dalloc_safety_checks(edata, ptr, binind)) { + if (large_dalloc_safety_checks(edata, ptr, + sz_index2size(binind))) { /* See the comment in isfree. */ continue; } diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index 68b8f381..85d9238b 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -49,7 +49,7 @@ TEST_BEGIN(test_grow_and_shrink) { size_t tsz; #define NCYCLES 3 unsigned i, j; -#define NSZS 1024 +#define NSZS 64 size_t szs[NSZS]; #define MAXSZ ZU(12 * 1024 * 1024) diff --git a/test/test.sh.in b/test/test.sh.in index b4fbb355..a4ee9396 100644 --- a/test/test.sh.in +++ b/test/test.sh.in @@ -43,6 +43,7 @@ for t in $@; do # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail). enable_fill=@enable_fill@ \ enable_prof=@enable_prof@ \ + limit_usize_gap=@limit_usize_gap@ \ . @srcroot@${t}.sh && \ export_malloc_conf && \ $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@ diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c index 8ef0786c..09536b29 100644 --- a/test/unit/arena_reset.c +++ b/test/unit/arena_reset.c @@ -78,7 +78,8 @@ vsalloc(tsdn_t *tsdn, const void *ptr) { return 0; } - return sz_index2size(full_alloc_ctx.szind); + return config_limit_usize_gap? edata_usize_get(full_alloc_ctx.edata): + sz_index2size(full_alloc_ctx.szind); } static unsigned diff --git a/test/unit/hpa.c b/test/unit/hpa.c index 50b96a87..6c42729a 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -5,7 +5,7 @@ #define SHARD_IND 111 -#define ALLOC_MAX (HUGEPAGE / 4) +#define ALLOC_MAX (HUGEPAGE) typedef struct test_data_s test_data_t; struct test_data_s { diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 02fedaa7..296b7bff 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -332,6 +332,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof); TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection); TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always); + TEST_MALLCTL_OPT(bool, limit_usize_gap, limit_usize_gap); #undef TEST_MALLCTL_OPT } diff --git a/test/unit/ph.c b/test/unit/ph.c index 28f5e488..0339f993 100644 --- a/test/unit/ph.c +++ b/test/unit/ph.c @@ -2,8 +2,9 @@ #include "jemalloc/internal/ph.h" +#define BFS_ENUMERATE_MAX 30 typedef struct node_s node_t; -ph_structs(heap, node_t); +ph_structs(heap, node_t, BFS_ENUMERATE_MAX); struct node_s { #define NODE_MAGIC 0x9823af7e @@ -239,6 +240,22 @@ TEST_BEGIN(test_ph_random) { expect_false(heap_empty(&heap), "Heap should not be empty"); + /* Enumerate nodes. */ + heap_enumerate_helper_t helper; + uint16_t max_queue_size = sizeof(helper.bfs_queue) + / sizeof(void *); + expect_u_eq(max_queue_size, BFS_ENUMERATE_MAX, + "Incorrect bfs queue length initialized"); + assert(max_queue_size == BFS_ENUMERATE_MAX); + heap_enumerate_prepare(&heap, &helper, + BFS_ENUMERATE_MAX, max_queue_size); + size_t node_count = 0; + while(heap_enumerate_next(&heap, &helper)) { + node_count ++; + } + expect_lu_eq(node_count, j, + "Unexpected enumeration results."); + /* Remove nodes. */ switch (i % 6) { case 0: diff --git a/test/unit/sec.c b/test/unit/sec.c index 0b5e1c31..cfef043f 100644 --- a/test/unit/sec.c +++ b/test/unit/sec.c @@ -412,7 +412,8 @@ TEST_BEGIN(test_expand_shrink_delegate) { bool deferred_work_generated = false; - test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE, + test_sec_init(&sec, &ta.pai, /* nshards */ 1, + /* max_alloc */ USIZE_GROW_SLOW_THRESHOLD, /* max_bytes */ 1000 * PAGE); edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false, /* guarded */ false, /* frequent_reuse */ false, diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c index 9e8a408f..24913803 100644 --- a/test/unit/size_classes.c +++ b/test/unit/size_classes.c @@ -26,7 +26,8 @@ TEST_BEGIN(test_size_classes) { size_t size_class, max_size_class; szind_t index, gen_index, max_index; - max_size_class = get_max_size_class(); + max_size_class = sz_limit_usize_gap_enabled()? SC_SMALL_MAXCLASS: + get_max_size_class(); max_index = sz_size2index(max_size_class); for (index = 0, size_class = sz_index2size(index); index < max_index || @@ -79,6 +80,40 @@ TEST_BEGIN(test_size_classes) { } TEST_END +TEST_BEGIN(test_grow_slow_size_classes) { + test_skip_if(!sz_limit_usize_gap_enabled()); + + size_t size = SC_LARGE_MINCLASS; + size_t target_usize = SC_LARGE_MINCLASS; + size_t max_size = get_max_size_class(); + size_t increase[3] = {PAGE - 1, 1, 1}; + while (size <= max_size) { + size_t usize = sz_s2u(size); + expect_zu_eq(usize, target_usize, + "sz_s2u() does not generate usize as expected."); + size += increase[0]; + usize = sz_s2u(size); + target_usize += PAGE; + expect_zu_eq(usize, target_usize, + "sz_s2u() does not generate usize as expected."); + size += increase[1]; + usize = sz_s2u(size); + expect_zu_eq(usize, target_usize, + "sz_s2u() does not generate usize as expected."); + size += increase[2]; + usize = sz_s2u(size); + target_usize += PAGE; + expect_zu_eq(usize, target_usize, + "sz_s2u() does not generate usize as expected."); + if (target_usize << 1 < target_usize) { + break; + } + target_usize = target_usize << 1; + size = target_usize; + } +} +TEST_END + TEST_BEGIN(test_psize_classes) { size_t size_class, max_psz; pszind_t pind, max_pind; @@ -182,6 +217,7 @@ int main(void) { return test( test_size_classes, + test_grow_slow_size_classes, test_psize_classes, test_overflow); } diff --git a/test/unit/size_classes.sh b/test/unit/size_classes.sh new file mode 100644 index 00000000..93d5e8d1 --- /dev/null +++ b/test/unit/size_classes.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +if [ "x${limit_usize_gap}" = "x1" ] ; then + export MALLOC_CONF="limit_usize_gap:true" +fi diff --git a/test/unit/stats.c b/test/unit/stats.c index 203a71b5..584a582f 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -202,17 +202,22 @@ TEST_END TEST_BEGIN(test_stats_arenas_large) { void *p; - size_t sz, allocated; + size_t sz, allocated, allocated_before; uint64_t epoch, nmalloc, ndalloc; + size_t malloc_size = (1U << (SC_LG_LARGE_MINCLASS + 1)) + 1; int expected = config_stats ? 0 : ENOENT; - p = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0)); + sz = sizeof(size_t); + expect_d_eq(mallctl("stats.arenas.0.large.allocated", + (void *)&allocated_before, &sz, NULL, 0), expected, + "Unexpected mallctl() result"); + + p = mallocx(malloc_size, MALLOCX_ARENA(0)); expect_ptr_not_null(p, "Unexpected mallocx() failure"); expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), 0, "Unexpected mallctl() failure"); - sz = sizeof(size_t); expect_d_eq(mallctl("stats.arenas.0.large.allocated", (void *)&allocated, &sz, NULL, 0), expected, "Unexpected mallctl() result"); @@ -223,8 +228,10 @@ TEST_BEGIN(test_stats_arenas_large) { &sz, NULL, 0), expected, "Unexpected mallctl() result"); if (config_stats) { - expect_zu_gt(allocated, 0, + expect_zu_ge(allocated_before, 0, "allocated should be greater than zero"); + expect_zu_ge(allocated - allocated_before, sz_s2u(malloc_size), + "the diff between allocated should be greater than the allocation made"); expect_u64_gt(nmalloc, 0, "nmalloc should be greater than zero"); expect_u64_ge(nmalloc, ndalloc,