From 70692cfb13332678af49f9d3c7bfe1fde65ec1aa Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Wed, 2 Dec 2020 18:44:34 -0800 Subject: [PATCH] hpdata: Add state changing helpers. We're about to allow hugepage subextent purging; get as much of our metadata handling ready as possible. --- include/jemalloc/internal/hpdata.h | 100 ++++++++++++++++++++--- src/hpa.c | 5 +- src/hpdata.c | 112 ++++++++++++++++++++++++- test/unit/hpdata.c | 127 ++++++++++++++++++++++++++++- 4 files changed, 331 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h index 5952a18f..faa62434 100644 --- a/include/jemalloc/internal/hpdata.h +++ b/include/jemalloc/internal/hpdata.h @@ -34,6 +34,16 @@ struct hpdata_s { uint64_t h_age; /* Whether or not we think the hugepage is mapped that way by the OS. */ bool h_huge; + + /* + * Whether or not some thread is purging this hpdata (i.e. has called + * hpdata_purge_begin but not yet called hpdata_purge_end), or + * hugifying it. Only one thread at a time is allowed to change a + * hugepage's state. + */ + bool h_mid_purge; + bool h_mid_hugify; + union { /* When nonempty, used by the psset bins. */ phn(hpdata_t) ph_link; @@ -90,6 +100,22 @@ hpdata_huge_get(const hpdata_t *hpdata) { return hpdata->h_huge; } +static inline bool +hpdata_changing_state_get(const hpdata_t *hpdata) { + return hpdata->h_mid_purge || hpdata->h_mid_hugify; +} + +static inline bool +hpdata_mid_purge_get(const hpdata_t *hpdata) { + return hpdata->h_mid_purge; +} + +static inline bool +hpdata_mid_hugify_get(const hpdata_t *hpdata) { + return hpdata->h_mid_hugify; +} + + static inline size_t hpdata_longest_free_range_get(const hpdata_t *hpdata) { return hpdata->h_longest_free_range; @@ -106,6 +132,11 @@ hpdata_nactive_get(hpdata_t *hpdata) { return hpdata->h_nactive; } +static inline size_t +hpdata_ndirty_get(hpdata_t *hpdata) { + return hpdata->h_ndirty; +} + static inline void hpdata_assert_empty(hpdata_t *hpdata) { assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES)); @@ -164,20 +195,69 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age); void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz); void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz); -/* - * Tell the hpdata that it's now a hugepage (which, correspondingly, means that - * all its pages become dirty. - */ -void hpdata_hugify(hpdata_t *hpdata); -/* - * Tell the hpdata that it's no longer a hugepage (all its pages are still - * counted as dirty, though; an explicit purge call is required to change that). - */ -void hpdata_dehugify(hpdata_t *hpdata); /* * Tell the hpdata (which should be empty) that all dirty pages in it have been * purged. */ void hpdata_purge(hpdata_t *hpdata); +/* + * The hpdata_purge_prepare_t allows grabbing the metadata required to purge + * subranges of a hugepage while holding a lock, drop the lock during the actual + * purging of them, and reacquire it to update the metadata again. + */ +typedef struct hpdata_purge_state_s hpdata_purge_state_t; +struct hpdata_purge_state_s { + size_t npurged; + fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)]; + size_t next_purge_search_begin; +}; + +/* + * Initializes purge state. The access to hpdata must be externally + * synchronized with other hpdata_* calls. + * + * You can tell whether or not a thread is purging or hugifying a given hpdata + * via hpdata_changing_state_get(hpdata). Racing hugification or purging + * operations aren't allowed. + * + * Once you begin purging, you have to follow through and call hpdata_purge_next + * until you're done, and then end. Allocating out of an hpdata undergoing + * purging is not allowed. + */ +void hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state); +/* + * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to + * true, and returns true. Otherwise, returns false to indicate that we're + * done. + * + * This requires exclusive access to the purge state, but *not* to the hpdata. + * In particular, unreserve calls are allowed while purging (i.e. you can dalloc + * into one part of the hpdata while purging a different part). + */ +bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state, + void **r_purge_addr, size_t *r_purge_size); +/* + * Updates the hpdata metadata after all purging is done. Needs external + * synchronization. + */ +void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state); + +/* + * Similarly, when hugifying , callers can do the metadata modifications while + * holding a lock (thereby setting the change_state field), but actually do the + * operation without blocking other threads. + */ +void hpdata_hugify_begin(hpdata_t *hpdata); +void hpdata_hugify_end(hpdata_t *hpdata); + +/* + * Tell the hpdata that it's no longer a hugepage (all its pages are still + * counted as dirty, though; an explicit purge call is required to change that). + * + * This should only be done after starting to purge, and before actually purging + * any contents. + */ +void hpdata_dehugify(hpdata_t *hpdata); + #endif /* JEMALLOC_INTERNAL_HPDATA_H */ diff --git a/src/hpa.c b/src/hpa.c index 75636047..a36eee4e 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -300,7 +300,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) bool hugify = hpa_should_hugify(shard, ps); if (hugify) { - hpdata_hugify(ps); + hpdata_hugify_begin(ps); } psset_insert(&shard->psset, ps); @@ -319,6 +319,9 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) * operations in this hpa shard. */ hpa_hugify(ps); + malloc_mutex_lock(tsdn, &shard->mtx); + hpdata_hugify_end(ps); + malloc_mutex_unlock(tsdn, &shard->mtx); } return edata; } diff --git a/src/hpdata.c b/src/hpdata.c index 8297158e..29aecff5 100644 --- a/src/hpdata.c +++ b/src/hpdata.c @@ -22,6 +22,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) { hpdata_addr_set(hpdata, addr); hpdata_age_set(hpdata, age); hpdata->h_huge = false; + hpdata->h_mid_purge = false; + hpdata->h_mid_hugify = false; hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES); hpdata->h_nactive = 0; fb_init(hpdata->active_pages, HUGEPAGE_PAGES); @@ -140,17 +142,125 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) { } void -hpdata_hugify(hpdata_t *hpdata) { +hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { hpdata_assert_consistent(hpdata); + assert(!hpdata->h_mid_purge); + assert(!hpdata->h_mid_hugify); + hpdata->h_mid_purge = true; + + purge_state->npurged = 0; + purge_state->next_purge_search_begin = 0; + + /* + * Initialize to_purge with everything that's not active but that is + * dirty. + * + * As an optimization, we could note that in practice we never allocate + * out of a hugepage while purging within it, and so could try to + * combine dirty extents separated by a non-dirty but non-active extent + * to avoid purge calls. This does nontrivially complicate metadata + * tracking though, so let's hold off for now. + */ + fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES); + fb_bit_and(purge_state->to_purge, purge_state->to_purge, + hpdata->dirty_pages, HUGEPAGE_PAGES); + + /* We purge everything we can. */ + assert(hpdata->h_ndirty - hpdata->h_nactive == fb_scount( + purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)); + + hpdata_assert_consistent(hpdata); +} + +bool +hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state, + void **r_purge_addr, size_t *r_purge_size) { + /* + * Note that we don't have a consistency check here; we're accessing + * hpdata without synchronization, and therefore have no right to expect + * a consistent state. + */ + assert(hpdata->h_mid_purge); + /* Should have dehugified already (if necessary). */ + assert(!hpdata->h_huge); + assert(!hpdata->h_mid_hugify); + + if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) { + return false; + } + size_t purge_begin; + size_t purge_len; + bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES, + purge_state->next_purge_search_begin, &purge_begin, &purge_len); + if (!found_range) { + return false; + } + + *r_purge_addr = (void *)( + (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE); + *r_purge_size = purge_len * PAGE; + + purge_state->next_purge_search_begin = purge_begin + purge_len; + purge_state->npurged += purge_len; + assert(purge_state->npurged <= HUGEPAGE_PAGES); + + return true; +} + +void +hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { + hpdata_assert_consistent(hpdata); + assert(hpdata->h_mid_purge); + assert(!hpdata->h_mid_hugify); + hpdata->h_mid_purge = false; + + assert(purge_state->npurged == fb_scount(purge_state->to_purge, + HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)); + + fb_bit_not(purge_state->to_purge, purge_state->to_purge, + HUGEPAGE_PAGES); + fb_bit_and(hpdata->dirty_pages, hpdata->dirty_pages, + purge_state->to_purge, HUGEPAGE_PAGES); + assert(hpdata->h_ndirty >= purge_state->npurged); + hpdata->h_ndirty -= purge_state->npurged; + + hpdata_assert_consistent(hpdata); +} + +void +hpdata_hugify_begin(hpdata_t *hpdata) { + hpdata_assert_consistent(hpdata); + assert(!hpdata->h_mid_purge); + assert(!hpdata->h_mid_hugify); + hpdata->h_mid_hugify = true; hpdata->h_huge = true; fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES); hpdata->h_ndirty = HUGEPAGE_PAGES; hpdata_assert_consistent(hpdata); } +void +hpdata_hugify_end(hpdata_t *hpdata) { + hpdata_assert_consistent(hpdata); + assert(!hpdata->h_mid_purge); + assert(hpdata->h_mid_hugify); + hpdata->h_mid_hugify = false; + hpdata_assert_consistent(hpdata); +} + void hpdata_dehugify(hpdata_t *hpdata) { hpdata_assert_consistent(hpdata); + /* + * These asserts are morally right; for now, though, we have the "purge a + * hugepage only in its entirety, when it becomes empty", path sharing + * hpdata_dehugify with the new purge pathway coming in the next + * commit. + */ + /* + assert(hpdata->h_mid_purge); + assert(!hpdata->h_mid_hugify); + */ hpdata->h_huge = false; hpdata_assert_consistent(hpdata); } diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c index 1bf58bca..2fd9a367 100644 --- a/test/unit/hpdata.c +++ b/test/unit/hpdata.c @@ -55,7 +55,132 @@ TEST_BEGIN(test_reserve_alloc) { } TEST_END +TEST_BEGIN(test_purge_simple) { + hpdata_t hpdata; + hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE); + + void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE); + expect_ptr_eq(alloc, HPDATA_ADDR, ""); + + /* Create HUGEPAGE_PAGES / 4 dirty inactive pages at the beginning. */ + hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE); + + expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 2, ""); + + expect_false(hpdata_changing_state_get(&hpdata), ""); + + hpdata_purge_state_t purge_state; + hpdata_purge_begin(&hpdata, &purge_state); + + expect_true(hpdata_changing_state_get(&hpdata), ""); + + void *purge_addr; + size_t purge_size; + bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr, + &purge_size); + expect_true(got_result, ""); + expect_ptr_eq(HPDATA_ADDR, purge_addr, ""); + expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, ""); + + expect_true(hpdata_changing_state_get(&hpdata), ""); + + got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr, + &purge_size); + expect_false(got_result, "Unexpected additional purge range: " + "extent at %p of size %zu", purge_addr, purge_size); + + expect_true(hpdata_changing_state_get(&hpdata), ""); + + hpdata_purge_end(&hpdata, &purge_state); + expect_false(hpdata_changing_state_get(&hpdata), ""); + expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, ""); +} +TEST_END + +/* + * We only test intervening dalloc's not intervening allocs; we don't need + * intervening allocs, and foreseeable optimizations will make them not just + * unnecessary but incorrect. In particular, if there are two dirty extents + * separated only by a retained extent, we can just purge the entire range, + * saving a purge call. + */ +TEST_BEGIN(test_purge_intervening_dalloc) { + hpdata_t hpdata; + hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE); + + /* Allocate the first 3/4 of the pages. */ + void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE); + expect_ptr_eq(alloc, HPDATA_ADDR, ""); + + /* Free the first 1/4 and the third 1/4 of the pages. */ + hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE); + hpdata_unreserve(&hpdata, + (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE), + HUGEPAGE_PAGES / 4 * PAGE); + + expect_zu_eq(hpdata_ndirty_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, ""); + + hpdata_purge_state_t purge_state; + hpdata_purge_begin(&hpdata, &purge_state); + + void *purge_addr; + size_t purge_size; + /* First purge. */ + bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr, + &purge_size); + expect_true(got_result, ""); + expect_ptr_eq(HPDATA_ADDR, purge_addr, ""); + expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, ""); + + /* Deallocate the second 1/4 before the second purge occurs. */ + hpdata_unreserve(&hpdata, + (void *)((uintptr_t)alloc + 1 * HUGEPAGE_PAGES / 4 * PAGE), + HUGEPAGE_PAGES / 4 * PAGE); + + /* Now continue purging. */ + got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr, + &purge_size); + expect_true(got_result, ""); + expect_ptr_eq( + (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE), + purge_addr, ""); + expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, ""); + + got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr, + &purge_size); + expect_false(got_result, "Unexpected additional purge range: " + "extent at %p of size %zu", purge_addr, purge_size); + + hpdata_purge_end(&hpdata, &purge_state); + + expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, ""); +} +TEST_END + +TEST_BEGIN(test_hugify) { + hpdata_t hpdata; + hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE); + + void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2); + expect_ptr_eq(alloc, HPDATA_ADDR, ""); + + expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ndirty_get(&hpdata), ""); + + expect_false(hpdata_changing_state_get(&hpdata), ""); + hpdata_hugify_begin(&hpdata); + expect_true(hpdata_changing_state_get(&hpdata), ""); + hpdata_hugify_end(&hpdata); + expect_false(hpdata_changing_state_get(&hpdata), ""); + + /* Hugeifying should have increased the dirty page count. */ + expect_zu_eq(HUGEPAGE_PAGES, hpdata_ndirty_get(&hpdata), ""); +} +TEST_END + int main(void) { return test_no_reentrancy( - test_reserve_alloc); + test_reserve_alloc, + test_purge_simple, + test_purge_intervening_dalloc, + test_hugify); }