HPA: Allow frequent reused alloc to bypass the slab_max_alloc limit, as long as

it's within the huge page size.  These requests do not concern internal
fragmentation with huge pages, since the entire range is expected to be
accessed.
This commit is contained in:
Qi Wang 2024-01-16 13:07:58 -08:00 committed by Qi Wang
parent b1792c80d2
commit a2c5267409
6 changed files with 51 additions and 19 deletions

View file

@ -20,7 +20,7 @@ struct pai_s {
* the results are not necessarily zeroed.
*/
size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
size_t nallocs, edata_list_active_t *results,
size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
bool *deferred_work_generated);
bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
size_t old_size, size_t new_size, bool zero,
@ -50,9 +50,10 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
static inline size_t
pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
edata_list_active_t *results, bool *deferred_work_generated) {
edata_list_active_t *results, bool frequent_reuse,
bool *deferred_work_generated) {
return self->alloc_batch(tsdn, self, size, nallocs, results,
deferred_work_generated);
frequent_reuse, deferred_work_generated);
}
static inline bool
@ -91,7 +92,8 @@ pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
* each item in the list.
*/
size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
bool *deferred_work_generated);
/* Ditto, for dalloc. */
void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
edata_list_active_t *list, bool *deferred_work_generated);

View file

@ -12,7 +12,8 @@ static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
size_t alignment, bool zero, bool guarded, bool frequent_reuse,
bool *deferred_work_generated);
static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
bool *deferred_work_generated);
static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@ -643,7 +644,9 @@ static size_t
hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
size_t nallocs, edata_list_active_t *results,
bool *deferred_work_generated) {
assert(size <= shard->opts.slab_max_alloc);
assert(size <= HUGEPAGE);
assert(size <= shard->opts.slab_max_alloc ||
size == sz_index2size(sz_size2index(size)));
bool oom = false;
size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
@ -712,14 +715,26 @@ hpa_from_pai(pai_t *self) {
static size_t
hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
edata_list_active_t *results, bool *deferred_work_generated) {
edata_list_active_t *results, bool frequent_reuse,
bool *deferred_work_generated) {
assert(nallocs > 0);
assert((size & PAGE_MASK) == 0);
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_CORE, 0);
hpa_shard_t *shard = hpa_from_pai(self);
if (size > shard->opts.slab_max_alloc) {
/*
* frequent_use here indicates this request comes from the arena bins,
* in which case it will be split into slabs, and therefore there is no
* intrinsic slack in the allocation (the entire range of allocated size
* will be accessed).
*
* In this case bypass the slab_max_alloc limit (if still within the
* huge page size). These requests do not concern internal
* fragmentation with huge pages (again, the full size will be used).
*/
if (!(frequent_reuse && size <= HUGEPAGE) &&
(size > shard->opts.slab_max_alloc)) {
return 0;
}
@ -771,7 +786,7 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
edata_list_active_t results;
edata_list_active_init(&results);
size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
&results, deferred_work_generated);
&results, frequent_reuse, deferred_work_generated);
assert(nallocs == 0 || nallocs == 1);
edata_t *edata = edata_list_active_first(&results);
return edata;

View file

@ -3,12 +3,13 @@
size_t
pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
edata_list_active_t *results, bool *deferred_work_generated) {
edata_list_active_t *results, bool frequent_reuse,
bool *deferred_work_generated) {
for (size_t i = 0; i < nallocs; i++) {
bool deferred_by_alloc = false;
edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
/* zero */ false, /* guarded */ false,
/* frequent_reuse */ false, &deferred_by_alloc);
/* zero */ false, /* guarded */ false, frequent_reuse,
&deferred_by_alloc);
*deferred_work_generated |= deferred_by_alloc;
if (edata == NULL) {
return i;

View file

@ -174,14 +174,15 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
static edata_t *
sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
sec_bin_t *bin, size_t size) {
sec_bin_t *bin, size_t size, bool frequent_reuse) {
malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
edata_list_active_t result;
edata_list_active_init(&result);
bool deferred_work_generated = false;
size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
1 + sec->opts.batch_fill_extra, &result, frequent_reuse,
&deferred_work_generated);
edata_t *ret = edata_list_active_first(&result);
if (ret != NULL) {
@ -251,7 +252,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
if (edata == NULL) {
if (do_batch_fill) {
edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
size);
size, frequent_reuse);
} else {
edata = pai_alloc(tsdn, sec->fallback, size, alignment,
zero, /* guarded */ false, frequent_reuse,

View file

@ -84,12 +84,25 @@ TEST_BEGIN(test_alloc_max) {
/* Small max */
bool deferred_work_generated = false;
edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
false, &deferred_work_generated);
/* frequent_reuse */ false, &deferred_work_generated);
expect_ptr_not_null(edata, "Allocation of small max failed");
edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
false, false, &deferred_work_generated);
false, /* frequent_reuse */ false, &deferred_work_generated);
expect_ptr_null(edata, "Allocation of larger than small max succeeded");
edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
false, /* frequent_reuse */ true, &deferred_work_generated);
expect_ptr_not_null(edata, "Allocation of frequent reused failed");
edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false,
false, /* frequent_reuse */ true, &deferred_work_generated);
expect_ptr_not_null(edata, "Allocation of frequent reused failed");
edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE + PAGE, PAGE, false,
false, /* frequent_reuse */ true, &deferred_work_generated);
expect_ptr_null(edata, "Allocation of larger than hugepage succeeded");
destroy_test_data(shard);
}
TEST_END
@ -273,7 +286,7 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
edata_list_active_t allocs_list;
edata_list_active_init(&allocs_list);
size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
&allocs_list, &deferred_work_generated);
&allocs_list, /* frequent_reuse */ false, &deferred_work_generated);
expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
allocs[i] = edata_list_active_first(&allocs_list);

View file

@ -73,7 +73,7 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
static inline size_t
pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
size_t nallocs, edata_list_active_t *results,
size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
bool *deferred_work_generated) {
pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
if (ta->alloc_fail) {