diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h index ea608213..80c4f79d 100644 --- a/include/jemalloc/internal/psset.h +++ b/include/jemalloc/internal/psset.h @@ -27,17 +27,6 @@ */ #define PSSET_NHUGE 2 -/* - * We keep two purge lists per page size class; one for hugified hpdatas (at - * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind + - * 1). This lets us implement a preference for purging non-hugified hpdatas - * among similarly-dirty ones. - * We reserve the last two indices for empty slabs, in that case purging - * hugified ones (which are definitionally all waste) before non-hugified ones - * (i.e. reversing the order). - */ -#define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES) - typedef struct psset_bin_stats_s psset_bin_stats_t; struct psset_bin_stats_s { /* How many pageslabs are in this bin? */ @@ -65,11 +54,7 @@ struct psset_stats_s { /* Non-huge and huge slabs. */ psset_bin_stats_t slabs[PSSET_NHUGE]; - /* - * The second index is huge stats; nonfull_slabs[pszind][0] contains - * stats for the non-huge slabs in bucket pszind, while - * nonfull_slabs[pszind][1] contains stats for the huge slabs. - */ + /* Non-full slabs, distinguished for non-huge and huge slabs. */ psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][PSSET_NHUGE]; /* @@ -88,9 +73,9 @@ struct psset_s { * The pageslabs, quantized by the size class of the largest contiguous * free run of pages in a pageslab. */ - hpdata_age_heap_t pageslabs[PSSET_NPSIZES]; + hpdata_age_heap_t pageslabs[PSSET_NHUGE][PSSET_NPSIZES]; /* Bitmap for which set bits correspond to non-empty heaps. */ - fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)]; + fb_group_t pageslab_bitmap[PSSET_NHUGE][FB_NGROUPS(PSSET_NPSIZES)]; psset_stats_t stats; /* * Slabs with no active allocations, but which are allowed to serve new @@ -102,9 +87,9 @@ struct psset_s { * to purge them (with later indices indicating slabs we want to purge * more). */ - hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS]; + hpdata_purge_list_t to_purge[PSSET_NHUGE][PSSET_NPSIZES]; /* Bitmap for which set bits correspond to non-empty purge lists. */ - fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)]; + fb_group_t purge_bitmap[PSSET_NHUGE][FB_NGROUPS(PSSET_NPSIZES)]; /* Slabs which are available to be hugified. */ hpdata_hugify_list_t to_hugify; }; diff --git a/src/psset.c b/src/psset.c index 9a833193..d3423f9d 100644 --- a/src/psset.c +++ b/src/psset.c @@ -5,18 +5,32 @@ #include "jemalloc/internal/fb.h" +static void +psset_init_pageslabs(hpdata_age_heap_t *pageslabs) { + for (int i = 0; i < PSSET_NPSIZES; i++) { + hpdata_age_heap_new(&pageslabs[i]); + } +} + +static void +psset_init_to_purge(hpdata_purge_list_t *to_purge) { + for (int i = 0; i < PSSET_NPSIZES; i++) { + hpdata_purge_list_init(&to_purge[i]); + } +} + void psset_init(psset_t *psset) { - for (unsigned i = 0; i < PSSET_NPSIZES; i++) { - hpdata_age_heap_new(&psset->pageslabs[i]); + for (int huge = 0; huge < PSSET_NHUGE; huge++) { + psset_init_pageslabs(psset->pageslabs[huge]); + fb_init(psset->pageslab_bitmap[huge], PSSET_NPSIZES); } - fb_init(psset->pageslab_bitmap, PSSET_NPSIZES); memset(&psset->stats, 0, sizeof(psset->stats)); hpdata_empty_list_init(&psset->empty); - for (int i = 0; i < PSSET_NPURGE_LISTS; i++) { - hpdata_purge_list_init(&psset->to_purge[i]); + for (int huge = 0; huge < PSSET_NHUGE; huge++) { + psset_init_to_purge(psset->to_purge[huge]); + fb_init(psset->purge_bitmap[huge], PSSET_NPSIZES); } - fb_init(psset->purge_bitmap, PSSET_NPURGE_LISTS); hpdata_hugify_list_init(&psset->to_hugify); } @@ -45,6 +59,11 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) { } } +static size_t +psset_hpdata_huge_index(const hpdata_t *ps) { + return (size_t)hpdata_huge_get(ps); +} + /* * The stats maintenance strategy is to remove a pageslab's contribution to the * stats when we call psset_update_begin, and re-add it (to a potentially new @@ -70,7 +89,7 @@ psset_slab_stats_insert_remove(psset_stats_t *stats, return; } - size_t huge_idx = (size_t)hpdata_huge_get(ps); + size_t huge_idx = psset_hpdata_huge_index(ps); stats->slabs[huge_idx].npageslabs += mul * 1; stats->slabs[huge_idx].nactive += mul * nactive; @@ -136,20 +155,26 @@ psset_hpdata_heap_index(const hpdata_t *ps) { static void psset_hpdata_heap_remove(psset_t *psset, hpdata_t *ps) { + size_t huge_idx = psset_hpdata_huge_index(ps); pszind_t pind = psset_hpdata_heap_index(ps); - hpdata_age_heap_remove(&psset->pageslabs[pind], ps); - if (hpdata_age_heap_empty(&psset->pageslabs[pind])) { - fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind); + hpdata_age_heap_t *heap = &psset->pageslabs[huge_idx][pind]; + hpdata_age_heap_remove(heap, ps); + if (hpdata_age_heap_empty(heap)) { + fb_unset(psset->pageslab_bitmap[huge_idx], PSSET_NPSIZES, + (size_t)pind); } } static void psset_hpdata_heap_insert(psset_t *psset, hpdata_t *ps) { + size_t huge_idx = psset_hpdata_huge_index(ps); pszind_t pind = psset_hpdata_heap_index(ps); - if (hpdata_age_heap_empty(&psset->pageslabs[pind])) { - fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind); + hpdata_age_heap_t *heap = &psset->pageslabs[huge_idx][pind]; + if (hpdata_age_heap_empty(heap)) { + fb_set(psset->pageslab_bitmap[huge_idx], PSSET_NPSIZES, + (size_t)pind); } - hpdata_age_heap_insert(&psset->pageslabs[pind], ps); + hpdata_age_heap_insert(heap, ps); } static void @@ -227,32 +252,18 @@ psset_purge_list_ind(hpdata_t *ps) { assert(ndirty > 0); /* * Higher indices correspond to lists we'd like to purge earlier; make - * the two highest indices correspond to empty lists, which we attempt + * the highest index correspond to empty list, which we attempt * to purge before purging any non-empty list. This has two advantages: * - Empty page slabs are the least likely to get reused (we'll only * pick them for an allocation if we have no other choice). * - Empty page slabs can purge every dirty page they contain in a * single call, which is not usually the case. - * - * We purge hugeified empty slabs before nonhugeified ones, on the basis - * that they are fully dirty, while nonhugified slabs might not be, so - * we free up more pages more easily. */ if (hpdata_nactive_get(ps) == 0) { - if (hpdata_huge_get(ps)) { - return PSSET_NPURGE_LISTS - 1; - } else { - return PSSET_NPURGE_LISTS - 2; - } + return PSSET_NPSIZES - 1; } - pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE)); - /* - * For non-empty slabs, we may reuse them again. Prefer purging - * non-hugeified slabs before hugeified ones then, among pages of - * similar dirtiness. We still get some benefit from the hugification. - */ - return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1); + return sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE)); } static void @@ -264,11 +275,13 @@ psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) { * purge LRU within a given dirtiness bucket. */ if (hpdata_purge_allowed_get(ps)) { + size_t huge = psset_hpdata_huge_index(ps); size_t ind = psset_purge_list_ind(ps); - hpdata_purge_list_t *purge_list = &psset->to_purge[ind]; + hpdata_purge_list_t *purge_list = &psset->to_purge[huge][ind]; hpdata_purge_list_remove(purge_list, ps); if (hpdata_purge_list_empty(purge_list)) { - fb_unset(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind); + fb_unset(psset->purge_bitmap[huge], PSSET_NPSIZES, + ind); } } } @@ -276,10 +289,11 @@ psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) { static void psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) { if (hpdata_purge_allowed_get(ps)) { + size_t huge = psset_hpdata_huge_index(ps); size_t ind = psset_purge_list_ind(ps); - hpdata_purge_list_t *purge_list = &psset->to_purge[ind]; + hpdata_purge_list_t *purge_list = &psset->to_purge[huge][ind]; if (hpdata_purge_list_empty(purge_list)) { - fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind); + fb_set(psset->purge_bitmap[huge], PSSET_NPSIZES, ind); } hpdata_purge_list_append(purge_list, ps); } @@ -343,33 +357,70 @@ psset_pick_alloc(psset_t *psset, size_t size) { assert(size <= HUGEPAGE); pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size)); - pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES, - (size_t)min_pind); - if (pind == PSSET_NPSIZES) { - return hpdata_empty_list_first(&psset->empty); - } - hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]); - if (ps == NULL) { - return NULL; + + /* + * Try to place allocation on already hugified page first if possible + * to better utilize them. + */ + for (int huge = PSSET_NHUGE - 1; huge >= 0; --huge) { + pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap[huge], + PSSET_NPSIZES, (size_t)min_pind); + if (pind == PSSET_NPSIZES) { + continue; + } + hpdata_t *ps = hpdata_age_heap_first( + &psset->pageslabs[huge][pind]); + if (ps == NULL) { + continue; + } + hpdata_assert_consistent(ps); + return ps; } - hpdata_assert_consistent(ps); - - return ps; + /* + * Couldn't find non-full slab to place allocation on, use empty slab + * if we have one available as last resort. + */ + return hpdata_empty_list_first(&psset->empty); } hpdata_t * psset_pick_purge(psset_t *psset) { - ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS, - PSSET_NPURGE_LISTS - 1); - if (ind_ssz < 0) { - return NULL; + /* + * We purge hugeified empty slabs before nonhugeified ones, on the + * basis that they are fully dirty, while nonhugified slabs might not + * be, so we free up more pages more easily. Another reason to prefer + * purging hugified slabs is to free continious physical memory ranges + * in case there is not enough of them due to fragmentation on + * operation system level. + */ + for (ssize_t huge = PSSET_NHUGE - 1; huge >= 0; --huge) { + if (!fb_get(psset->purge_bitmap[huge], PSSET_NPSIZES, + PSSET_NPSIZES - 1)) { + continue; + } + hpdata_t *ps = hpdata_purge_list_first( + &psset->to_purge[huge][PSSET_NPSIZES - 1]); + assert(ps != NULL); + return ps; } - pszind_t ind = (pszind_t)ind_ssz; - assert(ind < PSSET_NPURGE_LISTS); - hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]); - assert(ps != NULL); - return ps; + + /* For non-empty pageslabs prioritize to purge non-hugified ones. */ + for (ssize_t huge = 0; huge < PSSET_NHUGE; ++huge) { + ssize_t ind_ssz = fb_fls(psset->purge_bitmap[huge], + PSSET_NPSIZES, PSSET_NPSIZES - 1); + if (ind_ssz < 0) { + continue; + } + pszind_t ind = (pszind_t)ind_ssz; + assert(ind < PSSET_NPSIZES); + hpdata_t *ps = hpdata_purge_list_first( + &psset->to_purge[huge][ind]); + assert(ps != NULL); + return ps; + } + + return NULL; } hpdata_t * diff --git a/test/unit/psset.c b/test/unit/psset.c index 6bfdbb5f..f25968d9 100644 --- a/test/unit/psset.c +++ b/test/unit/psset.c @@ -705,86 +705,149 @@ TEST_BEGIN(test_insert_remove) { } TEST_END -TEST_BEGIN(test_purge_prefers_nonhuge) { - /* - * All else being equal, we should prefer purging non-huge pages over - * huge ones for non-empty extents. - */ - - /* Nothing magic about this constant. */ - enum { - NHP = 23, - }; - hpdata_t *hpdata; - +TEST_BEGIN(test_alloc_prefers_huge) { psset_t psset; psset_init(&psset); - hpdata_t hpdata_huge[NHP]; - uintptr_t huge_begin = (uintptr_t)&hpdata_huge[0]; - uintptr_t huge_end = (uintptr_t)&hpdata_huge[NHP]; - hpdata_t hpdata_nonhuge[NHP]; - uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0]; - uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP]; + hpdata_t nonhuge; + hpdata_init(&nonhuge, /* addr */ NULL, /* age */ 0); + psset_insert(&psset, &nonhuge); - for (size_t i = 0; i < NHP; i++) { - hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE), - 123 + i); - psset_insert(&psset, &hpdata_huge[i]); + hpdata_t huge; + hpdata_init(&huge, /* addr */ (void *) HUGEPAGE, /* age */ 1); + psset_insert(&psset, &huge); + psset_update_begin(&psset, &huge); + hpdata_hugify(&huge); + psset_update_end(&psset, &huge); - hpdata_init(&hpdata_nonhuge[i], - (void *)((10 + NHP + i) * HUGEPAGE), - 456 + i); - psset_insert(&psset, &hpdata_nonhuge[i]); + void *huge_allocs[HUGEPAGE_PAGES]; + /* All allocations should be placed on huge pageslab. */ + for (size_t i = 0; i < HUGEPAGE_PAGES; i++) { + hpdata_t *next = psset_pick_alloc(&psset, PAGE); + + expect_ptr_eq(hpdata_addr_get(next), hpdata_addr_get(&huge), + "Picked wrong pageslab to place allocation"); + expect_u64_eq(hpdata_age_get(next), hpdata_age_get(&huge), ""); + + psset_update_begin(&psset, next); + huge_allocs[i] = hpdata_reserve_alloc(next, PAGE); + psset_update_end(&psset, next); } - for (int i = 0; i < 2 * NHP; i++) { - hpdata = psset_pick_alloc(&psset, HUGEPAGE * 3 / 4); - psset_update_begin(&psset, hpdata); - void *ptr; - ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE * 3 / 4); - /* Ignore the first alloc, which will stick around. */ - (void)ptr; - /* - * The second alloc is to dirty the pages; free it immediately - * after allocating. - */ - ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE / 4); - hpdata_unreserve(hpdata, ptr, HUGEPAGE / 4); - if (huge_begin <= (uintptr_t)hpdata - && (uintptr_t)hpdata < huge_end) { - hpdata_hugify(hpdata); - } + void *nonhuge_allocs[HUGEPAGE_PAGES]; - hpdata_purge_allowed_set(hpdata, true); - psset_update_end(&psset, hpdata); + /* + * Now, when huge pageslab is full, we should place allocations on + * non-huge one. + */ + for (size_t i = 0; i < HUGEPAGE_PAGES; i++) { + hpdata_t *next = psset_pick_alloc(&psset, PAGE); + + expect_ptr_eq(hpdata_addr_get(next), hpdata_addr_get(&nonhuge), + "Picked wrong pageslab to place allocation"); + expect_u64_eq(hpdata_age_get(next), hpdata_age_get(&nonhuge), ""); + + psset_update_begin(&psset, next); + nonhuge_allocs[i] = hpdata_reserve_alloc(next, PAGE); + psset_update_end(&psset, next); } /* - * We've got a bunch of 1/8th dirty hpdatas. It should give us all the - * non-huge ones to purge, then all the huge ones, then refuse to purge - * further. + * Deallocate everything except one page from huge pageslab, because + * empty pageslab is a completely different story. */ - for (int i = 0; i < NHP; i++) { - hpdata = psset_pick_purge(&psset); - assert_true(nonhuge_begin <= (uintptr_t)hpdata - && (uintptr_t)hpdata < nonhuge_end, ""); - psset_update_begin(&psset, hpdata); - test_psset_fake_purge(hpdata); - hpdata_purge_allowed_set(hpdata, false); - psset_update_end(&psset, hpdata); + for (size_t i = 0; i < HUGEPAGE_PAGES - 1; i++) { + psset_update_begin(&psset, &huge); + hpdata_unreserve(&huge, huge_allocs[i], PAGE); + hpdata_purge_allowed_set(&huge, true); + psset_update_end(&psset, &huge); } - for (int i = 0; i < NHP; i++) { - hpdata = psset_pick_purge(&psset); - expect_true(huge_begin <= (uintptr_t)hpdata - && (uintptr_t)hpdata < huge_end, ""); - psset_update_begin(&psset, hpdata); - hpdata_dehugify(hpdata); - test_psset_fake_purge(hpdata); - hpdata_purge_allowed_set(hpdata, false); - psset_update_end(&psset, hpdata); + + /* And one page from nonhuge pageslab. */ + psset_update_begin(&psset, &nonhuge); + hpdata_unreserve(&nonhuge, nonhuge_allocs[0], PAGE); + hpdata_purge_allowed_set(&nonhuge, true); + psset_update_end(&psset, &nonhuge); + + /* + * Next allocation should be placed on huge pageslab, despite the fact + * that nonhuge pageslab is a better fit. + */ + hpdata_t *next = psset_pick_alloc(&psset, PAGE); + + expect_ptr_eq(hpdata_addr_get(next), hpdata_addr_get(&huge), + "Picked wrong pageslab to place allocation"); + expect_u64_eq(hpdata_age_get(next), hpdata_age_get(&huge), ""); +} +TEST_END + +static void +test_do_alloc_dalloc(psset_t *psset, hpdata_t *ps, int nallocs, int ndallocs) { + assert(nallocs >= ndallocs); + + VARIABLE_ARRAY(void *, ptrs, nallocs); + + psset_update_begin(psset, ps); + for (int i = 0; i < nallocs; i++) { + ptrs[i] = hpdata_reserve_alloc(ps, PAGE); } + for (int i = 0; i < ndallocs; i++) { + hpdata_unreserve(ps, ptrs[i], PAGE); + } + if (ndallocs > 0) { + hpdata_purge_allowed_set(ps, true); + } + psset_update_end(psset, ps); +} + +TEST_BEGIN(test_purge_prefers_nonhuge) { + psset_t psset; + psset_init(&psset); + + enum { + NALLOCS = 2, + NDALLOCS = NALLOCS - 1, + }; + + hpdata_t nonhuge; + hpdata_init(&nonhuge, /* addr */ NULL, /* age */ 0); + psset_insert(&psset, &nonhuge); + /* Left one active page to make slab non empty. */ + test_do_alloc_dalloc(&psset, &nonhuge, NALLOCS, NDALLOCS); + + hpdata_t huge; + hpdata_init(&huge, /* addr */ (void *) HUGEPAGE, /* age */ 1); + psset_insert(&psset, &huge); + psset_update_begin(&psset, &huge); + hpdata_hugify(&huge); + psset_update_end(&psset, &huge); + test_do_alloc_dalloc(&psset, &huge, NALLOCS, NDALLOCS); + + /* + * Now both pageslabs have same about of dirty pages, we should purge + * from nonhuge and then, when nothing left there purge from huge. + */ + hpdata_t* purge = psset_pick_purge(&psset); + + expect_ptr_eq(hpdata_addr_get(purge), + hpdata_addr_get(&nonhuge), + "Picked wrong pageslab to purge from"); + expect_u64_eq(hpdata_age_get(purge), hpdata_age_get(&nonhuge), + ""); + + psset_update_begin(&psset, purge); + test_psset_fake_purge(purge); + hpdata_purge_allowed_set(purge, hpdata_ndirty_get(purge) > 0); + psset_update_end(&psset, purge); + + purge = psset_pick_purge(&psset); + + expect_ptr_eq(hpdata_addr_get(purge), + hpdata_addr_get(&huge), + "Picked wrong pageslab to purge from"); + expect_u64_eq(hpdata_age_get(purge), hpdata_age_get(&huge), + ""); } TEST_END @@ -907,6 +970,7 @@ main(void) { test_stats_fullness, test_oldest_fit, test_insert_remove, + test_alloc_prefers_huge, test_purge_prefers_nonhuge, test_purge_prefers_empty, test_purge_prefers_empty_huge);