diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h index 393ed27f..feca5f5e 100644 --- a/include/jemalloc/internal/hpdata.h +++ b/include/jemalloc/internal/hpdata.h @@ -36,11 +36,30 @@ struct hpdata_s { bool h_huge; /* - * Whether or not some thread is purging this hpdata (i.e. has called - * hpdata_purge_begin but not yet called hpdata_purge_end), or - * hugifying it. Only one thread at a time is allowed to change a - * hugepage's state. + * For some properties, we keep parallel sets of bools; h_foo_allowed + * and h_in_psset_foo_container. This is a decoupling mechanism to + * avoid bothering the hpa (which manages policies) from the psset + * (which is the mechanism used to enforce those policies). This allows + * all the container management logic to live in one place, without the + * HPA needing to know or care how that happens. */ + + /* + * Whether or not the hpdata is allowed to be used to serve allocations, + * and whether or not the psset is currently tracking it as such. + */ + bool h_alloc_allowed; + bool h_in_psset_alloc_container; + + /* The same, but with purging. */ + bool h_purge_allowed; + bool h_in_psset_purge_container; + + /* And with hugifying. */ + bool h_hugify_allowed; + bool h_in_psset_hugify_container; + + /* Whether or not a purge or hugify is currently happening. */ bool h_mid_purge; bool h_mid_hugify; @@ -65,6 +84,12 @@ struct hpdata_s { ql_elm(hpdata_t) ql_link_empty; }; + /* + * Linkage for the psset to track candidates for purging and hugifying. + */ + ql_elm(hpdata_t) ql_link_purge; + ql_elm(hpdata_t) ql_link_hugify; + /* The length of the largest contiguous sequence of inactive pages. */ size_t h_longest_free_range; @@ -86,6 +111,9 @@ struct hpdata_s { }; TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty) +TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge) +TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify) + typedef ph(hpdata_t) hpdata_age_heap_t; ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t); @@ -116,8 +144,66 @@ hpdata_huge_get(const hpdata_t *hpdata) { } static inline bool -hpdata_changing_state_get(const hpdata_t *hpdata) { - return hpdata->h_mid_purge || hpdata->h_mid_hugify; +hpdata_alloc_allowed_get(const hpdata_t *hpdata) { + return hpdata->h_alloc_allowed; +} + +static inline void +hpdata_alloc_allowed_set(hpdata_t *hpdata, bool alloc_allowed) { + hpdata->h_alloc_allowed = alloc_allowed; +} + +static inline bool +hpdata_in_psset_alloc_container_get(const hpdata_t *hpdata) { + return hpdata->h_in_psset_alloc_container; +} + +static inline void +hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) { + assert(in_container != hpdata->h_in_psset_alloc_container); + hpdata->h_in_psset_alloc_container = in_container; +} + +static inline bool +hpdata_purge_allowed_get(const hpdata_t *hpdata) { + return hpdata->h_purge_allowed; +} + +static inline void +hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) { + hpdata->h_purge_allowed = purge_allowed; +} + +static inline bool +hpdata_in_psset_purge_container_get(const hpdata_t *hpdata) { + return hpdata->h_in_psset_purge_container; +} + +static inline void +hpdata_in_psset_purge_container_set(hpdata_t *hpdata, bool in_container) { + assert(in_container != hpdata->h_in_psset_purge_container); + hpdata->h_in_psset_purge_container = in_container; +} + +static inline bool +hpdata_hugify_allowed_get(const hpdata_t *hpdata) { + return hpdata->h_hugify_allowed; +} + +static inline void +hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) { + hpdata->h_hugify_allowed = hugify_allowed; +} + +static inline bool +hpdata_in_psset_hugify_container_get(const hpdata_t *hpdata) { + return hpdata->h_in_psset_hugify_container; +} + +static inline void +hpdata_in_psset_hugify_container_set(hpdata_t *hpdata, bool in_container) { + assert(in_container != hpdata->h_in_psset_hugify_container); + hpdata->h_in_psset_hugify_container = in_container; } static inline bool @@ -125,11 +211,29 @@ hpdata_mid_purge_get(const hpdata_t *hpdata) { return hpdata->h_mid_purge; } +static inline void +hpdata_mid_purge_set(hpdata_t *hpdata, bool mid_purge) { + assert(mid_purge != hpdata->h_mid_purge); + hpdata->h_mid_purge = mid_purge; +} + static inline bool hpdata_mid_hugify_get(const hpdata_t *hpdata) { return hpdata->h_mid_hugify; } +static inline void +hpdata_mid_hugify_set(hpdata_t *hpdata, bool mid_hugify) { + assert(mid_hugify != hpdata->h_mid_hugify); + hpdata->h_mid_hugify = mid_hugify; +} + +static inline bool +hpdata_changing_state_get(const hpdata_t *hpdata) { + return hpdata->h_mid_purge || hpdata->h_mid_hugify; +} + + static inline bool hpdata_updating_get(const hpdata_t *hpdata) { return hpdata->h_updating; @@ -278,26 +382,7 @@ bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state, */ void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state); -/* - * Similarly, when hugifying , callers can do the metadata modifications while - * holding a lock (thereby setting the change_state field), but actually do the - * operation without blocking other threads. - * - * Unlike most metadata operations, hugification ending should happen while an - * hpdata is in the psset (or upcoming hugepage collections). This is because - * while purge/use races are unsafe, purge/hugepageify races are perfectly - * reasonable. - */ -void hpdata_hugify_begin(hpdata_t *hpdata); -void hpdata_hugify_end(hpdata_t *hpdata); - -/* - * Tell the hpdata that it's no longer a hugepage (all its pages are still - * counted as dirty, though; an explicit purge call is required to change that). - * - * This should only be done after starting to purge, and before actually purging - * any contents. - */ +void hpdata_hugify(hpdata_t *hpdata); void hpdata_dehugify(hpdata_t *hpdata); #endif /* JEMALLOC_INTERNAL_HPDATA_H */ diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h index b220609b..6e08e8ba 100644 --- a/include/jemalloc/internal/psset.h +++ b/include/jemalloc/internal/psset.h @@ -61,8 +61,15 @@ struct psset_s { hpdata_age_heap_t pageslabs[PSSET_NPSIZES]; bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)]; psset_stats_t stats; - /* Slabs with no active allocations. */ - hpdata_empty_list_t empty_slabs; + /* + * Slabs with no active allocations, but which are allowed to serve new + * allocations. + */ + hpdata_empty_list_t empty; + /* Slabs which are available to be purged. */ + hpdata_purge_list_t to_purge; + /* Slabs which are available to be hugified. */ + hpdata_hugify_list_t to_hugify; }; void psset_init(psset_t *psset); @@ -77,6 +84,10 @@ void psset_update_end(psset_t *psset, hpdata_t *ps); /* Analogous to the eset_fit; pick a hpdata to serve the request. */ hpdata_t *psset_pick_alloc(psset_t *psset, size_t size); +/* Pick one to purge. */ +hpdata_t *psset_pick_purge(psset_t *psset); +/* Pick one to hugify. */ +hpdata_t *psset_pick_hugify(psset_t *psset); void psset_insert(psset_t *psset, hpdata_t *ps); void psset_remove(psset_t *psset, hpdata_t *ps); diff --git a/src/hpa.c b/src/hpa.c index 8f4642c8..5dd34c3b 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -227,65 +227,150 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) { return ps; } -/* - * As a precondition, ps should not be in the psset (we can handle deallocation - * races, but not allocation ones), and we should hold the shard mutex. - */ -static void -hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) { +/* Returns whether or not we purged anything. */ +static bool +hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_assert_owner(tsdn, &shard->mtx); - while (hpa_should_purge(shard, ps)) { - /* Do the metadata update bit while holding the lock. */ - hpdata_purge_state_t purge_state; - hpdata_purge_begin(ps, &purge_state); - shard->stats.npurge_passes++; - /* - * Dehugifying can only happen on the first loop iteration, - * since no other threads can allocate out of this ps while - * we're purging (and thus, can't hugify it), but there's not a - * natural way to express that in the control flow. - */ - bool needs_dehugify = false; - if (hpdata_huge_get(ps)) { - needs_dehugify = true; - shard->stats.ndehugifies++; - hpdata_dehugify(ps); - } - - /* Drop the lock to do the OS calls. */ - malloc_mutex_unlock(tsdn, &shard->mtx); - - if (needs_dehugify) { - pages_nohuge(hpdata_addr_get(ps), HUGEPAGE); - } - - size_t total_purged = 0; - uint64_t purges_this_pass = 0; - void *purge_addr; - size_t purge_size; - while (hpdata_purge_next(ps, &purge_state, &purge_addr, - &purge_size)) { - purges_this_pass++; - pages_purge_forced(purge_addr, purge_size); - total_purged += purge_size; - } - - /* Reacquire to finish our metadata update. */ - malloc_mutex_lock(tsdn, &shard->mtx); - shard->stats.npurges += purges_this_pass; - hpdata_purge_end(ps, &purge_state); - - assert(total_purged <= HUGEPAGE); - - /* - * We're not done here; other threads can't allocate out of ps - * while purging, but they can still deallocate. Those - * deallocations could have meant more purging than what we - * planned ought to happen. We have to re-check now that we've - * reacquired the mutex again. - */ + hpdata_t *to_purge = psset_pick_purge(&shard->psset); + if (to_purge == NULL) { + return false; } + assert(hpdata_purge_allowed_get(to_purge)); + assert(!hpdata_changing_state_get(to_purge)); + + /* + * Don't let anyone else purge or hugify this page while + * we're purging it (allocations and deallocations are + * OK). + */ + psset_update_begin(&shard->psset, to_purge); + assert(hpdata_alloc_allowed_get(to_purge)); + hpdata_mid_purge_set(to_purge, true); + hpdata_purge_allowed_set(to_purge, false); + hpdata_hugify_allowed_set(to_purge, false); + /* + * Unlike with hugification (where concurrent + * allocations are allowed), concurrent allocation out + * of a hugepage being purged is unsafe; we might hand + * out an extent for an allocation and then purge it + * (clearing out user data). + */ + hpdata_alloc_allowed_set(to_purge, false); + psset_update_end(&shard->psset, to_purge); + + /* Gather all the metadata we'll need during the purge. */ + bool dehugify = hpdata_huge_get(to_purge); + hpdata_purge_state_t purge_state; + hpdata_purge_begin(to_purge, &purge_state); + + malloc_mutex_unlock(tsdn, &shard->mtx); + + /* Actually do the purging, now that the lock is dropped. */ + if (dehugify) { + pages_nohuge(hpdata_addr_get(to_purge), HUGEPAGE); + } + size_t total_purged = 0; + uint64_t purges_this_pass = 0; + void *purge_addr; + size_t purge_size; + while (hpdata_purge_next(to_purge, &purge_state, &purge_addr, + &purge_size)) { + total_purged += purge_size; + assert(total_purged <= HUGEPAGE); + purges_this_pass++; + pages_purge_forced(purge_addr, purge_size); + } + + malloc_mutex_lock(tsdn, &shard->mtx); + /* The shard updates */ + shard->stats.npurge_passes++; + shard->stats.npurges += purges_this_pass; + if (dehugify) { + shard->stats.ndehugifies++; + } + + /* The hpdata updates. */ + psset_update_begin(&shard->psset, to_purge); + if (dehugify) { + hpdata_dehugify(to_purge); + } + hpdata_purge_end(to_purge, &purge_state); + hpdata_mid_purge_set(to_purge, false); + + hpdata_alloc_allowed_set(to_purge, true); + hpdata_purge_allowed_set(to_purge, hpa_should_purge(shard, to_purge)); + hpdata_hugify_allowed_set(to_purge, hpa_should_hugify(shard, to_purge)); + + psset_update_end(&shard->psset, to_purge); + + return true; +} + +/* Returns whether or not we hugified anything. */ +static bool +hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + + hpdata_t *to_hugify = psset_pick_hugify(&shard->psset); + if (to_hugify == NULL) { + return false; + } + assert(hpdata_hugify_allowed_get(to_hugify)); + assert(!hpdata_changing_state_get(to_hugify)); + + /* + * Don't let anyone else purge or hugify this page while + * we're hugifying it (allocations and deallocations are + * OK). + */ + psset_update_begin(&shard->psset, to_hugify); + hpdata_mid_hugify_set(to_hugify, true); + hpdata_purge_allowed_set(to_hugify, false); + hpdata_hugify_allowed_set(to_hugify, false); + assert(hpdata_alloc_allowed_get(to_hugify)); + psset_update_end(&shard->psset, to_hugify); + + malloc_mutex_unlock(tsdn, &shard->mtx); + + bool err = pages_huge(hpdata_addr_get(to_hugify), + HUGEPAGE); + /* + * It's not clear what we could do in case of error; we + * might get into situations where we loop trying to + * hugify some page and failing over and over again. + * Just eat the error and pretend we were successful. + */ + (void)err; + + malloc_mutex_lock(tsdn, &shard->mtx); + shard->stats.nhugifies++; + + psset_update_begin(&shard->psset, to_hugify); + hpdata_hugify(to_hugify); + hpdata_mid_hugify_set(to_hugify, false); + hpdata_purge_allowed_set(to_hugify, + hpa_should_purge(shard, to_hugify)); + hpdata_hugify_allowed_set(to_hugify, false); + psset_update_end(&shard->psset, to_hugify); + + return true; +} + + +static void +hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) { + bool hugified; + bool purged; + size_t nloop = 0; + /* Just *some* bound, to impose a worst-case latency bound. */ + size_t maxloops = 100;; + do { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + hugified = hpa_try_hugify(tsdn, shard); + purged = hpa_try_purge(tsdn, shard); + malloc_mutex_assert_owner(tsdn, &shard->mtx); + } while ((hugified || purged) && nloop++ < maxloops); } static edata_t * @@ -344,6 +429,10 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) * We should arguably reset dirty state here, but this would * require some sort of prepare + commit functionality that's a * little much to deal with for now. + * + * We don't have a do_deferred_work down this pathway, on the + * principle that we didn't *really* affect shard state (we + * tweaked the stats, but our tweaks weren't really accurate). */ psset_update_end(&shard->psset, ps); edata_cache_small_put(tsdn, &shard->ecs, edata); @@ -352,49 +441,14 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) return NULL; } - bool hugify = hpa_should_hugify(shard, ps); - if (hugify) { - hpdata_hugify_begin(ps); - shard->stats.nhugifies++; + if (hpa_should_hugify(shard, ps)) { + hpdata_hugify_allowed_set(ps, true); } psset_update_end(&shard->psset, ps); + hpa_do_deferred_work(tsdn, shard); malloc_mutex_unlock(tsdn, &shard->mtx); - if (hugify) { - /* - * Hugifying with the lock dropped is safe, even with - * concurrent modifications to the ps. This relies on - * the fact that the current implementation will never - * dehugify a non-empty pageslab, and ps will never - * become empty before we return edata to the user to be - * freed. - * - * Note that holding the lock would prevent not just operations - * on this page slab, but also operations any other alloc/dalloc - * operations in this hpa shard. - */ - bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE); - /* - * Pretending we succeed when we actually failed is safe; trying - * to rolllback would be tricky, though. Eat the error. - */ - (void)err; - malloc_mutex_lock(tsdn, &shard->mtx); - hpdata_hugify_end(ps); - if (hpa_should_purge(shard, ps)) { - /* - * There was a race in which the ps went from being - * almost full to having lots of free space while we - * hugified. Undo our operation, taking care to meet - * the precondition that the ps isn't in the psset. - */ - psset_update_begin(&shard->psset, ps); - hpa_purge(tsdn, shard, ps); - psset_update_end(&shard->psset, ps); - } - malloc_mutex_unlock(tsdn, &shard->mtx); - } return edata; } @@ -445,6 +499,14 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) { return NULL; } + /* + * TODO: the tail of this function is quite similar to the tail of + * hpa_try_alloc_no_grow (both, broadly, do the metadata management of + * initializing an edata_t from an hpdata_t once both have been + * allocated). The only differences are in error case handling and lock + * management (we hold grow_mtx, but should drop it before doing any + * deferred work). With a little refactoring, we could unify the paths. + */ psset_update_begin(&shard->psset, ps); void *addr = hpdata_reserve_alloc(ps, size); @@ -481,10 +543,20 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) { malloc_mutex_unlock(tsdn, &shard->grow_mtx); return NULL; } + if (hpa_should_hugify(shard, ps)) { + hpdata_hugify_allowed_set(ps, true); + } psset_update_end(&shard->psset, ps); - malloc_mutex_unlock(tsdn, &shard->mtx); + /* + * Drop grow_mtx before doing deferred work; other threads blocked on it + * should be allowed to proceed while we're working. + */ malloc_mutex_unlock(tsdn, &shard->grow_mtx); + + hpa_do_deferred_work(tsdn, shard); + + malloc_mutex_unlock(tsdn, &shard->mtx); return edata; } @@ -579,48 +651,15 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) { size_t unreserve_size = edata_size_get(edata); edata_cache_small_put(tsdn, &shard->ecs, edata); - /* - * We have three rules interacting here: - * - You can't update ps metadata while it's still in the psset. We - * enforce this because it's necessary for stats tracking and metadata - * management. - * - The ps must not be in the psset while purging. This is because we - * can't handle purge/alloc races. - * - Whoever removes the ps from the psset is the one to reinsert it. - */ - if (hpdata_mid_purge_get(ps)) { - /* - * Another thread started purging, and so the ps is not in the - * psset and we can do our metadata update. The other thread is - * in charge of reinserting the ps, so we're done. - */ - assert(hpdata_updating_get(ps)); - hpdata_unreserve(ps, unreserve_addr, unreserve_size); - malloc_mutex_unlock(tsdn, &shard->mtx); - return; - } - /* - * No other thread is purging, and the ps is non-empty, so it should be - * in the psset. - */ - assert(!hpdata_updating_get(ps)); psset_update_begin(&shard->psset, ps); hpdata_unreserve(ps, unreserve_addr, unreserve_size); - if (!hpa_should_purge(shard, ps)) { - /* - * This should be the common case; no other thread is purging, - * and we won't purge either. - */ - psset_update_end(&shard->psset, ps); - malloc_mutex_unlock(tsdn, &shard->mtx); - return; + if (hpa_should_purge(shard, ps)) { + hpdata_purge_allowed_set(ps, true); } - - /* It's our job to purge. */ - hpa_purge(tsdn, shard, ps); - psset_update_end(&shard->psset, ps); + hpa_do_deferred_work(tsdn, shard); + malloc_mutex_unlock(tsdn, &shard->mtx); } diff --git a/src/hpdata.c b/src/hpdata.c index 0cfeeed2..bb4808aa 100644 --- a/src/hpdata.c +++ b/src/hpdata.c @@ -22,6 +22,12 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) { hpdata_addr_set(hpdata, addr); hpdata_age_set(hpdata, age); hpdata->h_huge = false; + hpdata->h_alloc_allowed = true; + hpdata->h_in_psset_alloc_container = false; + hpdata->h_purge_allowed = false; + hpdata->h_in_psset_purge_container = false; + hpdata->h_hugify_allowed = false; + hpdata->h_in_psset_hugify_container = false; hpdata->h_mid_purge = false; hpdata->h_mid_hugify = false; hpdata->h_updating = false; @@ -44,6 +50,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) { * mid-update. */ assert(!hpdata->h_in_psset || hpdata->h_updating); + assert(hpdata->h_alloc_allowed); assert((sz & PAGE_MASK) == 0); size_t npages = sz >> LG_PAGE; assert(npages <= hpdata_longest_free_range_get(hpdata)); @@ -155,10 +162,6 @@ void hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { hpdata_assert_consistent(hpdata); /* See the comment in reserve. */ - assert(!hpdata->h_in_psset || hpdata->h_updating); - assert(!hpdata->h_mid_purge); - assert(!hpdata->h_mid_hugify); - hpdata->h_mid_purge = true; purge_state->npurged = 0; purge_state->next_purge_search_begin = 0; @@ -192,12 +195,6 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state, * hpdata without synchronization, and therefore have no right to expect * a consistent state. */ - assert(hpdata->h_mid_purge); - /* See the comment in reserve. */ - assert(!hpdata->h_in_psset || hpdata->h_updating); - /* Should have dehugified already (if necessary). */ - assert(!hpdata->h_huge); - assert(!hpdata->h_mid_hugify); if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) { return false; @@ -226,9 +223,6 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { hpdata_assert_consistent(hpdata); /* See the comment in reserve. */ assert(!hpdata->h_in_psset || hpdata->h_updating); - assert(hpdata->h_mid_purge); - assert(!hpdata->h_mid_hugify); - hpdata->h_mid_purge = false; assert(purge_state->npurged == fb_scount(purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)); @@ -244,40 +238,17 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { } void -hpdata_hugify_begin(hpdata_t *hpdata) { +hpdata_hugify(hpdata_t *hpdata) { hpdata_assert_consistent(hpdata); - /* See the comment in reserve. */ - assert(!hpdata->h_in_psset || hpdata->h_updating); - assert(!hpdata->h_mid_purge); - assert(!hpdata->h_mid_hugify); - hpdata->h_mid_hugify = true; hpdata->h_huge = true; fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES); hpdata->h_ntouched = HUGEPAGE_PAGES; hpdata_assert_consistent(hpdata); } -void -hpdata_hugify_end(hpdata_t *hpdata) { - hpdata_assert_consistent(hpdata); - /* - * This is the exception to the "no-metadata updates without informing - * the psset first" rule; this assert would be incorrect. - */ - /* assert(!hpdata->h_in_psset || hpdata->h_updating); */ - assert(!hpdata->h_mid_purge); - assert(hpdata->h_mid_hugify); - hpdata->h_mid_hugify = false; - hpdata_assert_consistent(hpdata); -} - void hpdata_dehugify(hpdata_t *hpdata) { hpdata_assert_consistent(hpdata); - assert(hpdata->h_updating); - assert(hpdata->h_updating); - assert(hpdata->h_mid_purge); - assert(!hpdata->h_mid_hugify); hpdata->h_huge = false; hpdata_assert_consistent(hpdata); } diff --git a/src/psset.c b/src/psset.c index 89971020..bb51e21e 100644 --- a/src/psset.c +++ b/src/psset.c @@ -15,7 +15,9 @@ psset_init(psset_t *psset) { } bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true); memset(&psset->stats, 0, sizeof(psset->stats)); - hpdata_empty_list_init(&psset->empty_slabs); + hpdata_empty_list_init(&psset->empty); + hpdata_purge_list_init(&psset->to_purge); + hpdata_hugify_list_init(&psset->to_hugify); } static void @@ -85,25 +87,56 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) { hpdata_age_heap_insert(&psset->pageslabs[pind], ps); } -/* - * Insert ps into the data structures we use to track allocation stats and pick - * the pageslabs for new allocations. - * - * In particular, this does *not* remove ps from any hugification / purging - * queues it may be in. - */ static void -psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) { +psset_stats_insert(psset_t* psset, hpdata_t *ps) { if (hpdata_empty(ps)) { psset_bin_stats_insert(psset->stats.empty_slabs, ps); + } else if (hpdata_full(ps)) { + psset_bin_stats_insert(psset->stats.full_slabs, ps); + } else { + size_t longest_free_range = hpdata_longest_free_range_get(ps); + + pszind_t pind = sz_psz2ind(sz_psz_quantize_floor( + longest_free_range << LG_PAGE)); + assert(pind < PSSET_NPSIZES); + + psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps); + } +} + +static void +psset_stats_remove(psset_t *psset, hpdata_t *ps) { + if (hpdata_empty(ps)) { + psset_bin_stats_remove(psset->stats.empty_slabs, ps); + } else if (hpdata_full(ps)) { + psset_bin_stats_remove(psset->stats.full_slabs, ps); + } else { + size_t longest_free_range = hpdata_longest_free_range_get(ps); + + pszind_t pind = sz_psz2ind(sz_psz_quantize_floor( + longest_free_range << LG_PAGE)); + assert(pind < PSSET_NPSIZES); + + psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps); + } +} + +/* + * Put ps into some container so that it can be found during future allocation + * requests. + */ +static void +psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) { + assert(!hpdata_in_psset_alloc_container_get(ps)); + hpdata_in_psset_alloc_container_set(ps, true); + if (hpdata_empty(ps)) { /* * This prepend, paired with popping the head in psset_fit, * means we implement LIFO ordering for the empty slabs set, * which seems reasonable. */ - hpdata_empty_list_prepend(&psset->empty_slabs, ps); + hpdata_empty_list_prepend(&psset->empty, ps); } else if (hpdata_full(ps)) { - psset_bin_stats_insert(psset->stats.full_slabs, ps); /* * We don't need to keep track of the full slabs; we're never * going to return them from a psset_pick_alloc call. @@ -115,23 +148,20 @@ psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) { longest_free_range << LG_PAGE)); assert(pind < PSSET_NPSIZES); - psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps); psset_hpdata_heap_insert(psset, pind, ps); } } /* Remove ps from those collections. */ static void -psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) { +psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) { + assert(hpdata_in_psset_alloc_container_get(ps)); + hpdata_in_psset_alloc_container_set(ps, false); + if (hpdata_empty(ps)) { - psset_bin_stats_remove(psset->stats.empty_slabs, ps); - hpdata_empty_list_remove(&psset->empty_slabs, ps); + hpdata_empty_list_remove(&psset->empty, ps); } else if (hpdata_full(ps)) { - /* - * We don't need to maintain an explicit container of full - * pageslabs anywhere, but we do have to update stats. - */ - psset_bin_stats_remove(psset->stats.full_slabs, ps); + /* Same as above -- do nothing in this case. */ } else { size_t longest_free_range = hpdata_longest_free_range_get(ps); @@ -139,7 +169,6 @@ psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) { longest_free_range << LG_PAGE)); assert(pind < PSSET_NPSIZES); - psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps); psset_hpdata_heap_remove(psset, pind, ps); } } @@ -149,7 +178,21 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) { hpdata_assert_consistent(ps); assert(hpdata_in_psset_get(ps)); hpdata_updating_set(ps, true); - psset_do_alloc_tracking_remove(psset, ps); + psset_stats_remove(psset, ps); + if (hpdata_in_psset_alloc_container_get(ps)) { + /* + * Some metadata updates can break alloc container invariants + * (e.g. the longest free range determines the hpdata_heap_t the + * pageslab lives in). + */ + assert(hpdata_alloc_allowed_get(ps)); + psset_alloc_container_remove(psset, ps); + } + /* + * We don't update presence in the purge list or hugify list; we try to + * keep those FIFO, even in the presence of other metadata updates. + * We'll update presence at the end of the metadata update if necessary. + */ } void @@ -157,7 +200,36 @@ psset_update_end(psset_t *psset, hpdata_t *ps) { hpdata_assert_consistent(ps); assert(hpdata_in_psset_get(ps)); hpdata_updating_set(ps, false); - psset_do_alloc_tracking_insert(psset, ps); + psset_stats_insert(psset, ps); + + /* + * The update begin should have removed ps from whatever alloc container + * it was in. + */ + assert(!hpdata_in_psset_alloc_container_get(ps)); + if (hpdata_alloc_allowed_get(ps)) { + psset_alloc_container_insert(psset, ps); + } + + if (hpdata_purge_allowed_get(ps) + && !hpdata_in_psset_purge_container_get(ps)) { + hpdata_in_psset_purge_container_set(ps, true); + hpdata_purge_list_append(&psset->to_purge, ps); + } else if (!hpdata_purge_allowed_get(ps) + && hpdata_in_psset_purge_container_get(ps)) { + hpdata_in_psset_purge_container_set(ps, false); + hpdata_purge_list_remove(&psset->to_purge, ps); + } + + if (hpdata_hugify_allowed_get(ps) + && !hpdata_in_psset_hugify_container_get(ps)) { + hpdata_in_psset_hugify_container_set(ps, true); + hpdata_hugify_list_append(&psset->to_hugify, ps); + } else if (!hpdata_hugify_allowed_get(ps) + && hpdata_in_psset_hugify_container_get(ps)) { + hpdata_in_psset_hugify_container_set(ps, false); + hpdata_hugify_list_remove(&psset->to_hugify, ps); + } } hpdata_t * @@ -169,7 +241,7 @@ psset_pick_alloc(psset_t *psset, size_t size) { pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info, (size_t)min_pind); if (pind == PSSET_NPSIZES) { - return hpdata_empty_list_first(&psset->empty_slabs); + return hpdata_empty_list_first(&psset->empty); } hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]); if (ps == NULL) { @@ -181,16 +253,48 @@ psset_pick_alloc(psset_t *psset, size_t size) { return ps; } +hpdata_t * +psset_pick_purge(psset_t *psset) { + return hpdata_purge_list_first(&psset->to_purge); +} + +hpdata_t * +psset_pick_hugify(psset_t *psset) { + return hpdata_hugify_list_first(&psset->to_hugify); +} + void psset_insert(psset_t *psset, hpdata_t *ps) { - /* We only support inserting empty pageslabs, for now. */ - assert(hpdata_empty(ps)); hpdata_in_psset_set(ps, true); - psset_do_alloc_tracking_insert(psset, ps); + + psset_stats_insert(psset, ps); + if (hpdata_alloc_allowed_get(ps)) { + psset_alloc_container_insert(psset, ps); + } + if (hpdata_purge_allowed_get(ps)) { + hpdata_in_psset_purge_container_set(ps, true); + hpdata_purge_list_append(&psset->to_purge, ps); + } + if (hpdata_hugify_allowed_get(ps)) { + hpdata_in_psset_hugify_container_set(ps, true); + hpdata_hugify_list_append(&psset->to_hugify, ps); + } } void psset_remove(psset_t *psset, hpdata_t *ps) { hpdata_in_psset_set(ps, false); - psset_do_alloc_tracking_remove(psset, ps); + + psset_stats_remove(psset, ps); + if (hpdata_in_psset_alloc_container_get(ps)) { + psset_alloc_container_remove(psset, ps); + } + if (hpdata_in_psset_purge_container_get(ps)) { + hpdata_in_psset_purge_container_set(ps, false); + hpdata_purge_list_remove(&psset->to_purge, ps); + } + if (hpdata_in_psset_purge_container_get(ps)) { + hpdata_in_psset_purge_container_set(ps, false); + hpdata_purge_list_remove(&psset->to_purge, ps); + } } diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c index 688911a6..2a702338 100644 --- a/test/unit/hpdata.c +++ b/test/unit/hpdata.c @@ -67,13 +67,9 @@ TEST_BEGIN(test_purge_simple) { expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, ""); - expect_false(hpdata_changing_state_get(&hpdata), ""); - hpdata_purge_state_t purge_state; hpdata_purge_begin(&hpdata, &purge_state); - expect_true(hpdata_changing_state_get(&hpdata), ""); - void *purge_addr; size_t purge_size; bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr, @@ -82,17 +78,12 @@ TEST_BEGIN(test_purge_simple) { expect_ptr_eq(HPDATA_ADDR, purge_addr, ""); expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, ""); - expect_true(hpdata_changing_state_get(&hpdata), ""); - got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr, &purge_size); expect_false(got_result, "Unexpected additional purge range: " "extent at %p of size %zu", purge_addr, purge_size); - expect_true(hpdata_changing_state_get(&hpdata), ""); - hpdata_purge_end(&hpdata, &purge_state); - expect_false(hpdata_changing_state_get(&hpdata), ""); expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, ""); } TEST_END @@ -166,12 +157,7 @@ TEST_BEGIN(test_hugify) { expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ntouched_get(&hpdata), ""); - expect_false(hpdata_changing_state_get(&hpdata), ""); - hpdata_hugify_begin(&hpdata); - expect_true(hpdata_changing_state_get(&hpdata), ""); - - hpdata_hugify_end(&hpdata); - expect_false(hpdata_changing_state_get(&hpdata), ""); + hpdata_hugify(&hpdata); /* Hugeifying should have increased the dirty page count. */ expect_zu_eq(HUGEPAGE_PAGES, hpdata_ntouched_get(&hpdata), "");