From 8347f1045aaf975192b06c3168a40a05ae8c206a Mon Sep 17 00:00:00 2001 From: guangli-dai Date: Wed, 16 Apr 2025 11:57:55 -0700 Subject: [PATCH] Renaming limit_usize_gap to disable_large_size_classes --- include/jemalloc/internal/edata.h | 12 +++---- include/jemalloc/internal/emap.h | 4 +-- .../internal/jemalloc_internal_externs.h | 2 +- include/jemalloc/internal/sc.h | 10 +++--- include/jemalloc/internal/sz.h | 14 ++++---- src/ctl.c | 6 ++-- src/eset.c | 22 ++++++------- src/jemalloc.c | 19 ++++++++--- src/pac.c | 33 ++++++++++--------- src/prof_data.c | 2 +- src/psset.c | 2 +- src/sec.c | 2 +- src/stats.c | 2 +- test/test.sh.in | 2 +- test/unit/arena_decay.c | 4 +-- test/unit/mallctl.c | 2 +- test/unit/size_classes.c | 4 +-- test/unit/size_classes.sh | 4 +-- 18 files changed, 78 insertions(+), 68 deletions(-) diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index b087ea31..e41e4efa 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -291,12 +291,12 @@ static inline size_t edata_usize_get(const edata_t *edata) { assert(edata != NULL); /* - * When sz_limit_usize_gap_enabled() is true, two cases: + * When sz_large_size_classes_disabled() is true, two cases: * 1. if usize_from_ind is not smaller than SC_LARGE_MINCLASS, * usize_from_size is accurate; * 2. otherwise, usize_from_ind is accurate. * - * When sz_limit_usize_gap_enabled() is not true, the two should be the + * When sz_large_size_classes_disabled() is not true, the two should be the * same when usize_from_ind is not smaller than SC_LARGE_MINCLASS. * * Note sampled small allocs will be promoted. Their extent size is @@ -316,9 +316,9 @@ edata_usize_get(const edata_t *edata) { } #endif - if (!sz_limit_usize_gap_enabled() || szind < SC_NBINS) { + if (!sz_large_size_classes_disabled() || szind < SC_NBINS) { size_t usize_from_ind = sz_index2size(szind); - if (!sz_limit_usize_gap_enabled() && + if (!sz_large_size_classes_disabled() && usize_from_ind >= SC_LARGE_MINCLASS) { size_t size = (edata->e_size_esn & EDATA_SIZE_MASK); assert(size > sz_large_pad); @@ -332,8 +332,8 @@ edata_usize_get(const edata_t *edata) { assert(size > sz_large_pad); size_t usize_from_size = size - sz_large_pad; /* - * no matter limit-usize-gap enabled or not, usize retrieved from size - * is not accurate when smaller than SC_LARGE_MINCLASS. + * no matter large size classes disabled or not, usize retrieved from + * size is not accurate when smaller than SC_LARGE_MINCLASS. */ assert(usize_from_size >= SC_LARGE_MINCLASS); return usize_from_size; diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h index 06ed5d32..fba46abe 100644 --- a/include/jemalloc/internal/emap.h +++ b/include/jemalloc/internal/emap.h @@ -237,7 +237,7 @@ emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab, alloc_ctx->szind = szind; alloc_ctx->slab = slab; alloc_ctx->usize = usize; - assert(sz_limit_usize_gap_enabled() || + assert(sz_large_size_classes_disabled() || usize == sz_index2size(szind)); } @@ -248,7 +248,7 @@ emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) { assert(alloc_ctx->usize == sz_index2size(alloc_ctx->szind)); return sz_index2size(alloc_ctx->szind); } - assert(sz_limit_usize_gap_enabled() || + assert(sz_large_size_classes_disabled() || alloc_ctx->usize == sz_index2size(alloc_ctx->szind)); assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS); return alloc_ctx->usize; diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index 83a37baf..3b42f833 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -39,7 +39,7 @@ extern atomic_zu_t zero_realloc_count; extern bool opt_cache_oblivious; extern unsigned opt_debug_double_free_max_scan; extern size_t opt_calloc_madvise_threshold; -extern bool opt_limit_usize_gap; +extern bool opt_disable_large_size_classes; extern const char *opt_malloc_conf_symlink; extern const char *opt_malloc_conf_env_var; diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h index 3b9280d8..97956e7a 100644 --- a/include/jemalloc/internal/sc.h +++ b/include/jemalloc/internal/sc.h @@ -287,11 +287,11 @@ #endif /* - * When limit_usize_gap is enabled, the gaps between two contiguous - * size classes should not exceed PAGE. This means there should be no concept - * of size classes for sizes > SC_SMALL_MAXCLASS (or >= SC_LARGE_MINCLASS). - * However, between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and - * 2 * SC_NGROUP * PAGE, the size class also happens to be aligned with PAGE. + * When large size classes are disabled, there is no concept of size classes + * for sizes > SC_SMALLMAXCLASS (or >= SC_LARGE_MINCLASS). This ensures that + * the overhead between the usable size and the user request size will not + * exceed PAGE. Between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and + * 2 * SC_NGROUP * PAGE, the size classes also happen to be aligned with PAGE. * Since tcache relies on size classes to work and it greatly increases the * perf of allocs & deallocs, we extend the existence of size class to * 2 * SC_NGROUP * PAGE ONLY for the tcache module. This means for all other diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h index 6f161260..e6cfa6a9 100644 --- a/include/jemalloc/internal/sz.h +++ b/include/jemalloc/internal/sz.h @@ -55,8 +55,8 @@ extern size_t sz_large_pad; extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious); JEMALLOC_ALWAYS_INLINE bool -sz_limit_usize_gap_enabled() { - return opt_limit_usize_gap; +sz_large_size_classes_disabled() { + return opt_disable_large_size_classes; } JEMALLOC_ALWAYS_INLINE pszind_t @@ -269,11 +269,11 @@ sz_index2size_unsafe(szind_t index) { JEMALLOC_ALWAYS_INLINE size_t sz_index2size(szind_t index) { - assert(!sz_limit_usize_gap_enabled() || + assert(!sz_large_size_classes_disabled() || index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD)); size_t size = sz_index2size_unsafe(index); /* - * With limit_usize_gap enabled, the usize above + * With large size classes disabled, the usize above * SC_LARGE_MINCLASS should grow by PAGE. However, for sizes * in [SC_LARGE_MINCLASS, USIZE_GROW_SLOW_THRESHOLD], the * usize would not change because the size class gap in this @@ -285,7 +285,7 @@ sz_index2size(szind_t index) { * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here * instead of SC_LARGE_MINCLASS. */ - assert(!sz_limit_usize_gap_enabled() || + assert(!sz_large_size_classes_disabled() || size <= USIZE_GROW_SLOW_THRESHOLD); return size; } @@ -335,11 +335,11 @@ sz_s2u_compute(size_t size) { (ZU(1) << lg_ceil)); } #endif - if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) { + if (size <= SC_SMALL_MAXCLASS || !sz_large_size_classes_disabled()) { return sz_s2u_compute_using_delta(size); } else { /* - * With sz_limit_usize_gap_enabled() == true, usize of a large + * With sz_large_size_classes_disabled() == true, usize of a large * allocation is calculated by ceiling size to the smallest * multiple of PAGE to minimize the memory overhead, especially * when using hugepages. diff --git a/src/ctl.c b/src/ctl.c index 49820af6..92d254c1 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -170,7 +170,7 @@ CTL_PROTO(opt_prof_sys_thread_name) CTL_PROTO(opt_prof_time_res) CTL_PROTO(opt_lg_san_uaf_align) CTL_PROTO(opt_zero_realloc) -CTL_PROTO(opt_limit_usize_gap) +CTL_PROTO(opt_disable_large_size_classes) CTL_PROTO(opt_process_madvise_max_batch) CTL_PROTO(opt_malloc_conf_symlink) CTL_PROTO(opt_malloc_conf_env_var) @@ -564,7 +564,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("zero_realloc"), CTL(opt_zero_realloc)}, {NAME("debug_double_free_max_scan"), CTL(opt_debug_double_free_max_scan)}, - {NAME("limit_usize_gap"), CTL(opt_limit_usize_gap)}, + {NAME("disable_large_size_classes"), CTL(opt_disable_large_size_classes)}, {NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)}, {NAME("malloc_conf"), CHILD(named, opt_malloc_conf)} }; @@ -2355,7 +2355,7 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align, opt_lg_san_uaf_align, ssize_t) CTL_RO_NL_GEN(opt_zero_realloc, zero_realloc_mode_names[opt_zero_realloc_action], const char *) -CTL_RO_NL_GEN(opt_limit_usize_gap, opt_limit_usize_gap, bool) +CTL_RO_NL_GEN(opt_disable_large_size_classes, opt_disable_large_size_classes, bool) /* malloc_conf options */ CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink, diff --git a/src/eset.c b/src/eset.c index 7dc9cce7..677162ff 100644 --- a/src/eset.c +++ b/src/eset.c @@ -232,7 +232,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size, /* See comments in eset_first_fit for why we enumerate search below. */ pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size)); - if (sz_limit_usize_gap_enabled() && pind != pind_prev) { + if (sz_large_size_classes_disabled() && pind != pind_prev) { edata_t *ret = NULL; ret = eset_enumerate_alignment_search(eset, min_size, pind_prev, alignment); @@ -287,7 +287,7 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only, pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size)); if (exact_only) { - if (sz_limit_usize_gap_enabled()) { + if (sz_large_size_classes_disabled()) { pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size)); return eset_enumerate_search(eset, size, pind_prev, @@ -300,28 +300,28 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only, /* * Each element in the eset->bins is a heap corresponding to a size - * class. When sz_limit_usize_gap_enabled() is false, all heaps after + * class. When sz_large_size_classes_disabled() is false, all heaps after * pind (including pind itself) will surely satisfy the rquests while * heaps before pind cannot satisfy the request because usize is * calculated based on size classes then. However, when - * sz_limit_usize_gap_enabled() is true, usize is calculated by ceiling - * user requested size to the closest multiple of PAGE. This means in - * the heap before pind, i.e., pind_prev, there may exist extents able - * to satisfy the request and we should enumerate the heap when - * pind_prev != pind. + * sz_large_size_classes_disabled() is true, usize is calculated by + * ceiling user requested size to the closest multiple of PAGE. This + * means in the heap before pind, i.e., pind_prev, there may exist + * extents able to satisfy the request and we should enumerate the heap + * when pind_prev != pind. * * For example, when PAGE=4KB and the user requested size is 1MB + 4KB, - * usize would be 1.25MB when sz_limit_usize_gap_enabled() is false. + * usize would be 1.25MB when sz_large_size_classes_disabled() is false. * pind points to the heap containing extents ranging in * [1.25MB, 1.5MB). Thus, searching starting from pind will not miss - * any candidates. When sz_limit_usize_gap_enabled() is true, the + * any candidates. When sz_large_size_classes_disabled() is true, the * usize would be 1MB + 4KB and pind still points to the same heap. * In this case, the heap pind_prev points to, which contains extents * in the range [1MB, 1.25MB), may contain candidates satisfying the * usize and thus should be enumerated. */ pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size)); - if (sz_limit_usize_gap_enabled() && pind != pind_prev){ + if (sz_large_size_classes_disabled() && pind != pind_prev){ ret = eset_enumerate_search(eset, size, pind_prev, /* exact_only */ false, &ret_summ); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 445955b0..360635a8 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -123,7 +123,12 @@ zero_realloc_action_t opt_zero_realloc_action = atomic_zu_t zero_realloc_count = ATOMIC_INIT(0); -bool opt_limit_usize_gap = true; +/* + * Disable large size classes is now the default behavior in jemalloc. + * Although it is configurable in MALLOC_CONF, this is mainly for debugging + * purposes and should not be tuned. + */ +bool opt_disable_large_size_classes = true; const char *const zero_realloc_mode_names[] = { "alloc", @@ -1780,8 +1785,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], "san_guard_large", 0, SIZE_T_MAX, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false) - CONF_HANDLE_BOOL(opt_limit_usize_gap, - "limit_usize_gap"); + /* + * Disable large size classes is now the default + * behavior in jemalloc. Although it is configurable + * in MALLOC_CONF, this is mainly for debugging + * purposes and should not be tuned. + */ + CONF_HANDLE_BOOL(opt_disable_large_size_classes, + "disable_large_size_classes"); CONF_ERROR("Invalid conf pair", k, klen, v, vlen); #undef CONF_ERROR @@ -2406,7 +2417,7 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind, if (unlikely(*ind >= SC_NSIZES)) { return true; } - *usize = sz_limit_usize_gap_enabled()? sz_s2u(size): + *usize = sz_large_size_classes_disabled()? sz_s2u(size): sz_index2size(*ind); assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS); return false; diff --git a/src/pac.c b/src/pac.c index 12c1e444..e9ba7957 100644 --- a/src/pac.c +++ b/src/pac.c @@ -143,25 +143,26 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, } /* - * We batched allocate a larger extent when limit_usize_gap is enabled + * We batched allocate a larger extent with large size classes disabled * because the reuse of extents in the dirty pool is worse without size - * classes for large allocs. For instance, when limit_usize_gap is not - * enabled, 1.1MB, 1.15MB, and 1.2MB allocs will all be ceiled to - * 1.25MB and can reuse the same buffer if they are alloc & dalloc - * sequentially. However, with limit_usize_gap enabled, they cannot - * reuse the same buffer and their sequential allocs & dallocs will - * result in three different extents. Thus, we cache extra mergeable - * extents in the dirty pool to improve the reuse. We skip this - * optimization if both maps_coalesce and opt_retain are disabled - * because VM is not cheap enough to be used aggressively and extents - * cannot be merged at will (only extents from the same VirtualAlloc - * can be merged). Note that it could still be risky to cache more - * extents when either mpas_coalesce or opt_retain is enabled. Yet - * doing so is still beneficial in improving the reuse of extents - * with some limits. This choice should be reevaluated if + * classes for large allocs. For instance, when + * disable_large_size_classes is false, 1.1MB, 1.15MB, and 1.2MB allocs + * will all be ceiled to 1.25MB and can reuse the same buffer if they + * are alloc & dalloc sequentially. However, with + * disable_large_size_classes being true, they cannot reuse the same + * buffer and their sequential allocs & dallocs will result in three + * different extents. Thus, we cache extra mergeable extents in the + * dirty pool to improve the reuse. We skip this optimization if both + * maps_coalesce and opt_retain are disabled because VM is not cheap + * enough in such cases to be used aggressively and extents cannot be + * merged at will (only extents from the same VirtualAlloc can be + * merged). Note that it could still be risky to cache more extents + * when either mpas_coalesce or opt_retain is enabled. Yet doing + * so is still beneficial in improving the reuse of extents with some + * limits. This choice should be reevaluated if * pac_alloc_retained_batched_size is changed to be more aggressive. */ - if (sz_limit_usize_gap_enabled() && edata == NULL && + if (sz_large_size_classes_disabled() && edata == NULL && (maps_coalesce || opt_retain)) { size_t batched_size = pac_alloc_retained_batched_size(size); /* diff --git a/src/prof_data.c b/src/prof_data.c index 437673ee..edc5c558 100644 --- a/src/prof_data.c +++ b/src/prof_data.c @@ -514,7 +514,7 @@ void prof_unbias_map_init(void) { #ifdef JEMALLOC_PROF for (szind_t i = 0; i < SC_NSIZES; i++) { /* - * When limit_usize_gap is enabled, the unbiased calculation + * With large size classes disabled, the unbiased calculation * here is not as accurate as it was because usize now changes * in a finer grain while the unbiased_sz is still calculated * using the old way. diff --git a/src/psset.c b/src/psset.c index e617f426..97694301 100644 --- a/src/psset.c +++ b/src/psset.c @@ -368,7 +368,7 @@ psset_pick_alloc(psset_t *psset, size_t size) { /* See comments in eset_first_fit for why we enumerate search below. */ pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size)); - if (sz_limit_usize_gap_enabled() && pind_prev < min_pind) { + if (sz_large_size_classes_disabled() && pind_prev < min_pind) { ps = psset_enumerate_search(psset, pind_prev, size); if (ps != NULL) { return ps; diff --git a/src/sec.c b/src/sec.c index 8827d1bd..67585a71 100644 --- a/src/sec.c +++ b/src/sec.c @@ -29,7 +29,7 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback, * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases * by PAGE and the number of usizes is too large. */ - assert(!sz_limit_usize_gap_enabled() || + assert(!sz_large_size_classes_disabled() || opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD); size_t max_alloc = PAGE_FLOOR(opts->max_alloc); diff --git a/src/stats.c b/src/stats.c index db9b9f43..d3127483 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1730,7 +1730,7 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_CHAR_P("stats_interval_opts") OPT_WRITE_CHAR_P("zero_realloc") OPT_WRITE_SIZE_T("process_madvise_max_batch") - OPT_WRITE_BOOL("limit_usize_gap") + OPT_WRITE_BOOL("disable_large_size_classes") emitter_dict_end(emitter); /* Close "opt". */ diff --git a/test/test.sh.in b/test/test.sh.in index a4ee9396..dc13bc28 100644 --- a/test/test.sh.in +++ b/test/test.sh.in @@ -43,7 +43,7 @@ for t in $@; do # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail). enable_fill=@enable_fill@ \ enable_prof=@enable_prof@ \ - limit_usize_gap=@limit_usize_gap@ \ + disable_large_size_classes=@disable_large_size_classes@ \ . @srcroot@${t}.sh && \ export_malloc_conf && \ $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@ diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c index 00a38326..177ba505 100644 --- a/test/unit/arena_decay.c +++ b/test/unit/arena_decay.c @@ -411,11 +411,11 @@ TEST_BEGIN(test_decay_never) { size_t pdirty_prev = get_arena_pdirty(arena_ind); size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind); /* - * With limit_usize_gap enabled, some more extents + * With sz_large_size_classes_disabled() = true, some more extents * are cached in the dirty pool, making the assumption below * not true. */ - if (!sz_limit_usize_gap_enabled()) { + if (!sz_large_size_classes_disabled()) { expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages"); } expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages"); diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 7d4634e8..cf9b88aa 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -333,7 +333,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof); TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection); TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always); - TEST_MALLCTL_OPT(bool, limit_usize_gap, always); + TEST_MALLCTL_OPT(bool, disable_large_size_classes, always); TEST_MALLCTL_OPT(size_t, process_madvise_max_batch, always); #undef TEST_MALLCTL_OPT diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c index 24913803..c373829c 100644 --- a/test/unit/size_classes.c +++ b/test/unit/size_classes.c @@ -26,7 +26,7 @@ TEST_BEGIN(test_size_classes) { size_t size_class, max_size_class; szind_t index, gen_index, max_index; - max_size_class = sz_limit_usize_gap_enabled()? SC_SMALL_MAXCLASS: + max_size_class = sz_large_size_classes_disabled()? SC_SMALL_MAXCLASS: get_max_size_class(); max_index = sz_size2index(max_size_class); @@ -81,7 +81,7 @@ TEST_BEGIN(test_size_classes) { TEST_END TEST_BEGIN(test_grow_slow_size_classes) { - test_skip_if(!sz_limit_usize_gap_enabled()); + test_skip_if(!sz_large_size_classes_disabled()); size_t size = SC_LARGE_MINCLASS; size_t target_usize = SC_LARGE_MINCLASS; diff --git a/test/unit/size_classes.sh b/test/unit/size_classes.sh index 93d5e8d1..54363554 100644 --- a/test/unit/size_classes.sh +++ b/test/unit/size_classes.sh @@ -1,5 +1,3 @@ #!/bin/sh -if [ "x${limit_usize_gap}" = "x1" ] ; then - export MALLOC_CONF="limit_usize_gap:true" -fi +export MALLOC_CONF="disable_large_size_classes:true"