diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index f46820f4..c145c91e 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -17,6 +17,9 @@ extern const char *percpu_arena_mode_names[]; extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; extern malloc_mutex_t arenas_lock; +extern size_t opt_huge_threshold; +extern size_t huge_threshold; + void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy); @@ -81,6 +84,8 @@ void arena_nthreads_inc(arena_t *arena, bool internal); void arena_nthreads_dec(arena_t *arena, bool internal); size_t arena_extent_sn_next(arena_t *arena); arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +bool arena_init_huge(void); +arena_t *arena_choose_huge(tsd_t *tsd); void arena_boot(void); void arena_prefork0(tsdn_t *tsdn, arena_t *arena); void arena_prefork1(tsdn_t *tsdn, arena_t *arena); diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 2b7e77e7..401be758 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -8,6 +8,27 @@ #include "jemalloc/internal/sz.h" #include "jemalloc/internal/ticker.h" +JEMALLOC_ALWAYS_INLINE arena_t * +arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { + if (arena != NULL) { + return arena; + } + + /* + * For huge allocations, use the dedicated huge arena if both are true: + * 1) is using auto arena selection (i.e. arena == NULL), and 2) the + * thread is not assigned to a manual arena. + */ + if (unlikely(size >= huge_threshold)) { + arena_t *tsd_arena = tsd_arena_get(tsd); + if (tsd_arena == NULL || arena_is_auto(tsd_arena)) { + return arena_choose_huge(tsd); + } + } + + return arena_choose(tsd, NULL); +} + JEMALLOC_ALWAYS_INLINE prof_tctx_t * arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { cassert(config_prof); diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h index 70001b5f..759713c9 100644 --- a/include/jemalloc/internal/arena_types.h +++ b/include/jemalloc/internal/arena_types.h @@ -40,4 +40,10 @@ typedef enum { #define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) #define PERCPU_ARENA_DEFAULT percpu_arena_disabled +/* + * When allocation_size >= huge_threshold, use the dedicated huge arena (unless + * have explicitly spicified arena index). 0 disables the feature. + */ +#define HUGE_THRESHOLD_DEFAULT 0 + #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h index 2e76e5d8..8b0ac462 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h @@ -71,7 +71,9 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) { static inline bool arena_is_auto(arena_t *arena) { assert(narenas_auto > 0); - return (arena_ind_get(arena) < narenas_auto); + unsigned offset = (opt_huge_threshold != 0) ? 1 : 0; + + return (arena_ind_get(arena) < narenas_auto + offset); } JEMALLOC_ALWAYS_INLINE extent_t * diff --git a/src/arena.c b/src/arena.c index b5c3dbec..49d86d2c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,6 +42,10 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { static div_info_t arena_binind_div_info[NBINS]; +size_t opt_huge_threshold = HUGE_THRESHOLD_DEFAULT; +size_t huge_threshold = HUGE_THRESHOLD_DEFAULT; +static unsigned huge_arena_ind; + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to @@ -1378,7 +1382,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, assert(!tsdn_null(tsdn) || arena != NULL); if (likely(!tsdn_null(tsdn))) { - arena = arena_choose(tsdn_tsd(tsdn), arena); + arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, size); } if (unlikely(arena == NULL)) { return NULL; @@ -1939,6 +1943,58 @@ label_error: return NULL; } +arena_t * +arena_choose_huge(tsd_t *tsd) { + /* huge_arena_ind can be 0 during init (will use a0). */ + if (huge_arena_ind == 0) { + assert(!malloc_initialized()); + } + + arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false); + if (huge_arena == NULL) { + /* Create the huge arena on demand. */ + assert(huge_arena_ind != 0); + huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true); + if (huge_arena == NULL) { + return NULL; + } + /* + * Purge eagerly for huge allocations, because: 1) number of + * huge allocations is usually small, which means ticker based + * decay is not reliable; and 2) less immediate reuse is + * expected for huge allocations. + */ + if (arena_dirty_decay_ms_default_get() > 0) { + arena_dirty_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0); + } + if (arena_muzzy_decay_ms_default_get() > 0) { + arena_muzzy_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0); + } + } + + return huge_arena; +} + +bool +arena_init_huge(void) { + bool huge_enabled; + + /* The threshold should be large size class. */ + if (opt_huge_threshold > LARGE_MAXCLASS || + opt_huge_threshold < LARGE_MINCLASS) { + opt_huge_threshold = 0; + huge_threshold = LARGE_MAXCLASS + PAGE; + huge_enabled = false; + } else { + /* Reserve the index for the huge arena. */ + huge_arena_ind = narenas_total_get(); + huge_threshold = opt_huge_threshold; + huge_enabled = true; + } + + return huge_enabled; +} + void arena_boot(void) { arena_dirty_decay_ms_default_set(opt_dirty_decay_ms); diff --git a/src/jemalloc.c b/src/jemalloc.c index 300e8976..594669c3 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -327,7 +327,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { */ arena = arena_get(tsdn, ind, false); if (arena != NULL) { - assert(ind < narenas_auto); + assert(arena_is_auto(arena)); return arena; } @@ -1142,11 +1142,15 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") } CONF_HANDLE_BOOL(opt_tcache, "tcache") + CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max", + -1, (sizeof(size_t) << 3) - 1) + + CONF_HANDLE_SIZE_T(opt_huge_threshold, "huge_threshold", + LARGE_MINCLASS, LARGE_MAXCLASS, yes, yes, false) CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit, "lg_extent_max_active_fit", 0, (sizeof(size_t) << 3), yes, yes, false) - CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max", - -1, (sizeof(size_t) << 3) - 1) + if (strncmp("percpu_arena", k, klen) == 0) { bool match = false; for (int i = percpu_arena_mode_names_base; i < @@ -1465,6 +1469,9 @@ malloc_init_narenas(void) { narenas_auto); } narenas_total_set(narenas_auto); + if (arena_init_huge()) { + narenas_total_inc(); + } return false; } diff --git a/src/large.c b/src/large.c index 4951f3ee..03eecfad 100644 --- a/src/large.c +++ b/src/large.c @@ -42,7 +42,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, */ is_zeroed = zero; if (likely(!tsdn_null(tsdn))) { - arena = arena_choose(tsdn_tsd(tsdn), arena); + arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize); } if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn, arena, usize, alignment, &is_zeroed)) == NULL) { diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 8a36c0a4..4ecf5bd2 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -341,6 +341,9 @@ TEST_BEGIN(test_thread_arena) { sz = sizeof(unsigned); assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); + if (opt_huge_threshold != 0) { + narenas--; + } assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect"); if (strcmp(opa, "disabled") == 0) {