diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 6dcffac9..432ec17c 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -496,6 +496,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) { *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0); emap_alloc_ctx_t alloc_ctx; + size_t usize; if (!size_hint) { bool err = emap_alloc_ctx_try_lookup_fast(tsd, &arena_emap_global, ptr, &alloc_ctx); @@ -507,6 +508,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) { return false; } assert(alloc_ctx.szind != SC_NSIZES); + usize = sz_index2size(alloc_ctx.szind); } else { /* * Check for both sizes that are too large, and for sampled / @@ -518,7 +520,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) { /* check_prof */ true))) { return false; } - alloc_ctx.szind = sz_size2index_lookup(size); + sz_size2index_usize_fastpath(size, &alloc_ctx.szind, &usize); /* Max lookup class must be small. */ assert(alloc_ctx.szind < SC_NBINS); /* This is a dead store, except when opt size checking is on. */ @@ -534,7 +536,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) { uint64_t deallocated, threshold; te_free_fastpath_ctx(tsd, &deallocated, &threshold); - size_t usize = sz_index2size(alloc_ctx.szind); uint64_t deallocated_after = deallocated + usize; /* * Check for events and tsd non-nominal (fast_threshold will be set to diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h index 955d8ec0..a2d2debc 100644 --- a/include/jemalloc/internal/sz.h +++ b/include/jemalloc/internal/sz.h @@ -152,8 +152,8 @@ sz_psz2u(size_t psz) { return usize; } -static inline szind_t -sz_size2index_compute(size_t size) { +JEMALLOC_ALWAYS_INLINE szind_t +sz_size2index_compute_inline(size_t size) { if (unlikely(size > SC_LARGE_MAXCLASS)) { return SC_NSIZES; } @@ -186,6 +186,11 @@ sz_size2index_compute(size_t size) { } } +static inline szind_t +sz_size2index_compute(size_t size) { + return sz_size2index_compute_inline(size); +} + JEMALLOC_ALWAYS_INLINE szind_t sz_size2index_lookup_impl(size_t size) { assert(size <= SC_LOOKUP_MAXCLASS); @@ -208,8 +213,8 @@ sz_size2index(size_t size) { return sz_size2index_compute(size); } -static inline size_t -sz_index2size_compute(szind_t index) { +JEMALLOC_ALWAYS_INLINE size_t +sz_index2size_compute_inline(szind_t index) { #if (SC_NTINY > 0) if (index < SC_NTINY) { return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index)); @@ -234,6 +239,11 @@ sz_index2size_compute(szind_t index) { } } +static inline size_t +sz_index2size_compute(szind_t index) { + return sz_index2size_compute_inline(index); +} + JEMALLOC_ALWAYS_INLINE size_t sz_index2size_lookup_impl(szind_t index) { return sz_index2size_tab[index]; @@ -254,8 +264,19 @@ sz_index2size(szind_t index) { JEMALLOC_ALWAYS_INLINE void sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) { - *ind = sz_size2index_lookup_impl(size); - *usize = sz_index2size_lookup_impl(*ind); + if (util_compile_time_const(size)) { + /* + * When inlined, the size may become known at compile + * time, which allows static computation through LTO. + */ + *ind = sz_size2index_compute_inline(size); + assert(*ind == sz_size2index_lookup_impl(size)); + *usize = sz_index2size_compute_inline(*ind); + assert(*usize == sz_index2size_lookup_impl(*ind)); + } else { + *ind = sz_size2index_lookup_impl(size); + *usize = sz_index2size_lookup_impl(*ind); + } } JEMALLOC_ALWAYS_INLINE size_t diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index f4035095..24f23629 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -79,6 +79,16 @@ get_errno(void) { } while(0) #endif +/* Allows compiler constant folding on inlined paths. */ +#if defined(__has_builtin) +# if __has_builtin(__builtin_constant_p) +# define util_compile_time_const(x) __builtin_constant_p(x) +# endif +#endif +#ifndef util_compile_time_const +# define util_compile_time_const(x) (false) +#endif + /* ptr should be valid. */ JEMALLOC_ALWAYS_INLINE void util_prefetch_read(void *ptr) {