From a2c52674091c53f6af1ac8b7ef8849bc7797a5ad Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Tue, 16 Jan 2024 13:07:58 -0800 Subject: [PATCH] HPA: Allow frequent reused alloc to bypass the slab_max_alloc limit, as long as it's within the huge page size. These requests do not concern internal fragmentation with huge pages, since the entire range is expected to be accessed. --- include/jemalloc/internal/pai.h | 10 ++++++---- src/hpa.c | 25 ++++++++++++++++++++----- src/pai.c | 7 ++++--- src/sec.c | 7 ++++--- test/unit/hpa.c | 19 ++++++++++++++++--- test/unit/sec.c | 2 +- 6 files changed, 51 insertions(+), 19 deletions(-) diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h index dd64ee59..557d30d1 100644 --- a/include/jemalloc/internal/pai.h +++ b/include/jemalloc/internal/pai.h @@ -20,7 +20,7 @@ struct pai_s { * the results are not necessarily zeroed. */ size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, + size_t nallocs, edata_list_active_t *results, bool frequent_reuse, bool *deferred_work_generated); bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size, bool zero, @@ -50,9 +50,10 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, static inline size_t pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs, - edata_list_active_t *results, bool *deferred_work_generated) { + edata_list_active_t *results, bool frequent_reuse, + bool *deferred_work_generated) { return self->alloc_batch(tsdn, self, size, nallocs, results, - deferred_work_generated); + frequent_reuse, deferred_work_generated); } static inline bool @@ -91,7 +92,8 @@ pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) { * each item in the list. */ size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated); + size_t nallocs, edata_list_active_t *results, bool frequent_reuse, + bool *deferred_work_generated); /* Ditto, for dalloc. */ void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list, bool *deferred_work_generated); diff --git a/src/hpa.c b/src/hpa.c index ee41994f..99d1f033 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -12,7 +12,8 @@ static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, bool guarded, bool frequent_reuse, bool *deferred_work_generated); static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated); + size_t nallocs, edata_list_active_t *results, bool frequent_reuse, + bool *deferred_work_generated); static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated); static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, @@ -643,7 +644,9 @@ static size_t hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated) { - assert(size <= shard->opts.slab_max_alloc); + assert(size <= HUGEPAGE); + assert(size <= shard->opts.slab_max_alloc || + size == sz_index2size(sz_size2index(size))); bool oom = false; size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom, @@ -712,14 +715,26 @@ hpa_from_pai(pai_t *self) { static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs, - edata_list_active_t *results, bool *deferred_work_generated) { + edata_list_active_t *results, bool frequent_reuse, + bool *deferred_work_generated) { assert(nallocs > 0); assert((size & PAGE_MASK) == 0); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); hpa_shard_t *shard = hpa_from_pai(self); - if (size > shard->opts.slab_max_alloc) { + /* + * frequent_use here indicates this request comes from the arena bins, + * in which case it will be split into slabs, and therefore there is no + * intrinsic slack in the allocation (the entire range of allocated size + * will be accessed). + * + * In this case bypass the slab_max_alloc limit (if still within the + * huge page size). These requests do not concern internal + * fragmentation with huge pages (again, the full size will be used). + */ + if (!(frequent_reuse && size <= HUGEPAGE) && + (size > shard->opts.slab_max_alloc)) { return 0; } @@ -771,7 +786,7 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, edata_list_active_t results; edata_list_active_init(&results); size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1, - &results, deferred_work_generated); + &results, frequent_reuse, deferred_work_generated); assert(nallocs == 0 || nallocs == 1); edata_t *edata = edata_list_active_first(&results); return edata; diff --git a/src/pai.c b/src/pai.c index 45c87729..e8cddfc3 100644 --- a/src/pai.c +++ b/src/pai.c @@ -3,12 +3,13 @@ size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs, - edata_list_active_t *results, bool *deferred_work_generated) { + edata_list_active_t *results, bool frequent_reuse, + bool *deferred_work_generated) { for (size_t i = 0; i < nallocs; i++) { bool deferred_by_alloc = false; edata_t *edata = pai_alloc(tsdn, self, size, PAGE, - /* zero */ false, /* guarded */ false, - /* frequent_reuse */ false, &deferred_by_alloc); + /* zero */ false, /* guarded */ false, frequent_reuse, + &deferred_by_alloc); *deferred_work_generated |= deferred_by_alloc; if (edata == NULL) { return i; diff --git a/src/sec.c b/src/sec.c index df675590..19d69ff4 100644 --- a/src/sec.c +++ b/src/sec.c @@ -174,14 +174,15 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, static edata_t * sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, - sec_bin_t *bin, size_t size) { + sec_bin_t *bin, size_t size, bool frequent_reuse) { malloc_mutex_assert_not_owner(tsdn, &shard->mtx); edata_list_active_t result; edata_list_active_init(&result); bool deferred_work_generated = false; size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size, - 1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated); + 1 + sec->opts.batch_fill_extra, &result, frequent_reuse, + &deferred_work_generated); edata_t *ret = edata_list_active_first(&result); if (ret != NULL) { @@ -251,7 +252,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, if (edata == NULL) { if (do_batch_fill) { edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin, - size); + size, frequent_reuse); } else { edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero, /* guarded */ false, frequent_reuse, diff --git a/test/unit/hpa.c b/test/unit/hpa.c index 64aef59e..9e3160b4 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -84,12 +84,25 @@ TEST_BEGIN(test_alloc_max) { /* Small max */ bool deferred_work_generated = false; edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false, - false, &deferred_work_generated); + /* frequent_reuse */ false, &deferred_work_generated); expect_ptr_not_null(edata, "Allocation of small max failed"); + edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false, - false, false, &deferred_work_generated); + false, /* frequent_reuse */ false, &deferred_work_generated); expect_ptr_null(edata, "Allocation of larger than small max succeeded"); + edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, + false, /* frequent_reuse */ true, &deferred_work_generated); + expect_ptr_not_null(edata, "Allocation of frequent reused failed"); + + edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false, + false, /* frequent_reuse */ true, &deferred_work_generated); + expect_ptr_not_null(edata, "Allocation of frequent reused failed"); + + edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE + PAGE, PAGE, false, + false, /* frequent_reuse */ true, &deferred_work_generated); + expect_ptr_null(edata, "Allocation of larger than hugepage succeeded"); + destroy_test_data(shard); } TEST_END @@ -273,7 +286,7 @@ TEST_BEGIN(test_alloc_dalloc_batch) { edata_list_active_t allocs_list; edata_list_active_init(&allocs_list); size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2, - &allocs_list, &deferred_work_generated); + &allocs_list, /* frequent_reuse */ false, &deferred_work_generated); expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom"); for (size_t i = NALLOCS / 2; i < NALLOCS; i++) { allocs[i] = edata_list_active_first(&allocs_list); diff --git a/test/unit/sec.c b/test/unit/sec.c index f3ec403d..0b5e1c31 100644 --- a/test/unit/sec.c +++ b/test/unit/sec.c @@ -73,7 +73,7 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size, static inline size_t pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, + size_t nallocs, edata_list_active_t *results, bool frequent_reuse, bool *deferred_work_generated) { pai_test_allocator_t *ta = (pai_test_allocator_t *)self; if (ta->alloc_fail) {