From a0d18fe5d76b04ba568b83134228458db528a7ed Mon Sep 17 00:00:00 2001 From: guangli-dai Date: Thu, 18 Apr 2024 15:42:14 -0700 Subject: [PATCH] Modify usize calculation and corresponding tests. --- include/jemalloc/internal/edata.h | 7 +++++- include/jemalloc/internal/sz.h | 33 ++++++++++++++++++++++-- src/ctl.c | 4 +-- src/hpa.c | 2 +- src/jemalloc.c | 1 + src/prof_data.c | 8 +++++- test/integration/rallocx.c | 2 +- test/unit/size_classes.c | 42 ++++++++++++++++++++++++++++++- 8 files changed, 90 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 6d9967b9..f784805e 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -287,9 +287,14 @@ edata_szind_get(const edata_t *edata) { return szind; } +/* + * Calculating usize based on szind is unsafe because the result can be + * inaccurate when sz_limit_usize_gap_enabled() is true. The result should be + * compared against SC_LARGE_MINCLASS before usage. + */ static inline size_t edata_usize_get_from_ind_unsafe(const edata_t *edata) { - return sz_index2size(edata_szind_get(edata)); + return sz_index2size_unsafe(edata_szind_get(edata)); } static inline size_t diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h index 5c8e3cb1..56c63123 100644 --- a/include/jemalloc/internal/sz.h +++ b/include/jemalloc/internal/sz.h @@ -266,11 +266,33 @@ sz_index2size_lookup(szind_t index) { } JEMALLOC_ALWAYS_INLINE size_t -sz_index2size(szind_t index) { +sz_index2size_unsafe(szind_t index) { assert(index < SC_NSIZES); return sz_index2size_lookup(index); } +JEMALLOC_ALWAYS_INLINE size_t +sz_index2size(szind_t index) { + size_t size = sz_index2size_unsafe(index); + if (sz_limit_usize_gap_enabled()) { + /* + * With limit_usize_gap enabled, the usize above + * SC_LARGE_MINCLASS should grow by PAGE. However, for sizes + * in [SC_LARGE_MINCLASS, USIZE_GROW_SLOW_THRESHOLD], the + * usize would not change because the size class gap in this + * range is just the same as PAGE. Although we use + * SC_LARGE_MINCLASS as the threshold in most places, we + * allow tcache and sec to cache up to + * USIZE_GROW_SLOW_THRESHOLD to minimize the side effect of + * not having size classes for larger sizes. Thus, we assert + * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here + * instead of SC_LARGE_MINCLASS. + */ + assert(size <= USIZE_GROW_SLOW_THRESHOLD); + } + return size; +} + JEMALLOC_ALWAYS_INLINE void sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) { if (util_compile_time_const(size)) { @@ -305,7 +327,7 @@ sz_s2u_compute(size_t size) { (ZU(1) << lg_ceil)); } #endif - { + if (!sz_limit_usize_gap_enabled() || size <= SC_SMALL_MAXCLASS) { size_t x = lg_floor((size<<1)-1); size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - SC_LG_NGROUP - 1; @@ -313,11 +335,18 @@ sz_s2u_compute(size_t size) { size_t delta_mask = delta - 1; size_t usize = (size + delta_mask) & ~delta_mask; return usize; + } else { + size_t usize = ((size + PAGE - 1) >> LG_PAGE) << LG_PAGE; + assert(usize - size < PAGE); + return usize; } } JEMALLOC_ALWAYS_INLINE size_t sz_s2u_lookup(size_t size) { + if (config_limit_usize_gap) { + assert(size < SC_LARGE_MINCLASS); + } size_t ret = sz_index2size_lookup(sz_size2index_lookup(size)); assert(ret == sz_s2u_compute(size)); diff --git a/src/ctl.c b/src/ctl.c index 4be491e9..73d4cb66 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -3368,8 +3368,8 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, } CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned) -CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(SC_NBINS+(szind_t)mib[2]), - size_t) +CTL_RO_NL_GEN(arenas_lextent_i_size, + sz_index2size_unsafe(SC_NBINS+(szind_t)mib[2]), size_t) static const ctl_named_node_t * arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { diff --git a/src/hpa.c b/src/hpa.c index cb3f978c..59291426 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -707,7 +707,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *deferred_work_generated) { assert(size <= HUGEPAGE); assert(size <= shard->opts.slab_max_alloc || - size == sz_index2size(sz_size2index(size))); + size == sz_s2u(size)); bool oom = false; size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom, diff --git a/src/jemalloc.c b/src/jemalloc.c index 32d72392..cf514ed8 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2204,6 +2204,7 @@ malloc_init_hard(void) { if (config_limit_usize_gap) { assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD); assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD); + assert(SC_LG_TINY_MAXCLASS <= SC_LARGE_MINCLASS); } #if defined(_WIN32) && _WIN32_WINNT < 0x0600 _init_init_lock(); diff --git a/src/prof_data.c b/src/prof_data.c index 39af0c90..437673ee 100644 --- a/src/prof_data.c +++ b/src/prof_data.c @@ -513,7 +513,13 @@ void prof_unbias_map_init(void) { /* See the comment in prof_sample_new_event_wait */ #ifdef JEMALLOC_PROF for (szind_t i = 0; i < SC_NSIZES; i++) { - double sz = (double)sz_index2size(i); + /* + * When limit_usize_gap is enabled, the unbiased calculation + * here is not as accurate as it was because usize now changes + * in a finer grain while the unbiased_sz is still calculated + * using the old way. + */ + double sz = (double)sz_index2size_unsafe(i); double rate = (double)(ZU(1) << lg_prof_sample); double div_val = 1.0 - exp(-sz / rate); double unbiased_sz = sz / div_val; diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index 68b8f381..f7e4998b 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -49,7 +49,7 @@ TEST_BEGIN(test_grow_and_shrink) { size_t tsz; #define NCYCLES 3 unsigned i, j; -#define NSZS 1024 +#define NSZS 75 size_t szs[NSZS]; #define MAXSZ ZU(12 * 1024 * 1024) diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c index 9e8a408f..1b1454cf 100644 --- a/test/unit/size_classes.c +++ b/test/unit/size_classes.c @@ -26,7 +26,8 @@ TEST_BEGIN(test_size_classes) { size_t size_class, max_size_class; szind_t index, gen_index, max_index; - max_size_class = get_max_size_class(); + max_size_class = sz_limit_usize_gap_enabled()? SC_SMALL_MAXCLASS: + get_max_size_class(); max_index = sz_size2index(max_size_class); for (index = 0, size_class = sz_index2size(index); index < max_index || @@ -79,6 +80,44 @@ TEST_BEGIN(test_size_classes) { } TEST_END +TEST_BEGIN(test_grow_slow_size_classes) { + test_skip_if(!config_limit_usize_gap); + /* + * Override runtime option for this test to test usize calculation when + * sz_limit_usize_gap_enabled() is true. + */ + opt_limit_usize_gap = true; + size_t size = SC_LARGE_MINCLASS; + size_t target_usize = SC_LARGE_MINCLASS; + size_t max_size = get_max_size_class(); + size_t increase[3] = {PAGE - 1, 1, 1}; + while (size <= max_size) { + size_t usize = sz_s2u(size); + expect_zu_eq(usize, target_usize, + "sz_s2u() does not generate usize as expected."); + size += increase[0]; + usize = sz_s2u(size); + target_usize += PAGE; + expect_zu_eq(usize, target_usize, + "sz_s2u() does not generate usize as expected."); + size += increase[1]; + usize = sz_s2u(size); + expect_zu_eq(usize, target_usize, + "sz_s2u() does not generate usize as expected."); + size += increase[2]; + usize = sz_s2u(size); + target_usize += PAGE; + expect_zu_eq(usize, target_usize, + "sz_s2u() does not generate usize as expected."); + if (target_usize << 1 < target_usize) { + break; + } + target_usize = target_usize << 1; + size = target_usize; + } +} +TEST_END + TEST_BEGIN(test_psize_classes) { size_t size_class, max_psz; pszind_t pind, max_pind; @@ -182,6 +221,7 @@ int main(void) { return test( test_size_classes, + test_grow_slow_size_classes, test_psize_classes, test_overflow); }