mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-05-16 01:46:23 +03:00
This change includes the following improvements: - Remove the hpa_sec_batch_fill_extra parameter. - Refactor the hpa_alloc() code and helper functions to be able to allocate more than one extent out of a single pageslab. This way we can amortize the per-pageslab costs (active bitmap iteration, pageslab metadata updates) across multiple extents. - Decide on a min and max number of extents that will be allocated in hpa_alloc(). The code will try to allocate at least the min and allocate up to the max as long as we can allocate additional ones from the pageslab we already have, as additional allocations are relatively cheap. - Add extent allocation distribution stats. - Amend hpa_sec_integration.c unit test.
503 lines
15 KiB
C
503 lines
15 KiB
C
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
|
|
|
#include "jemalloc/internal/hpdata.h"
|
|
|
|
static int
|
|
hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
|
|
uint64_t a_age = hpdata_age_get(a);
|
|
uint64_t b_age = hpdata_age_get(b);
|
|
/*
|
|
* hpdata ages are operation counts in the psset; no two should be the
|
|
* same.
|
|
*/
|
|
assert(a_age != b_age);
|
|
return (a_age > b_age) - (a_age < b_age);
|
|
}
|
|
|
|
ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
|
|
|
|
void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge) {
|
|
hpdata_addr_set(hpdata, addr);
|
|
hpdata_age_set(hpdata, age);
|
|
hpdata->h_huge = is_huge;
|
|
hpdata->h_alloc_allowed = true;
|
|
hpdata->h_in_psset_alloc_container = false;
|
|
hpdata->h_purge_allowed = false;
|
|
hpdata->h_hugify_allowed = false;
|
|
hpdata->h_in_psset_hugify_container = false;
|
|
hpdata->h_mid_purge = false;
|
|
hpdata->h_mid_hugify = false;
|
|
hpdata->h_updating = false;
|
|
hpdata->h_in_psset = false;
|
|
hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
|
|
hpdata->h_nactive = 0;
|
|
fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
|
|
if (is_huge) {
|
|
fb_set_range(
|
|
hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
|
|
hpdata->h_ntouched = HUGEPAGE_PAGES;
|
|
} else {
|
|
fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
|
|
hpdata->h_ntouched = 0;
|
|
}
|
|
nstime_init_zero(&hpdata->h_time_purge_allowed);
|
|
hpdata->h_purged_when_empty_and_huge = false;
|
|
|
|
hpdata_assert_consistent(hpdata);
|
|
}
|
|
|
|
void *
|
|
hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
|
|
hpdata_assert_consistent(hpdata);
|
|
/*
|
|
* This is a metadata change; the hpdata should therefore either not be
|
|
* in the psset, or should have explicitly marked itself as being
|
|
* mid-update.
|
|
*/
|
|
assert(!hpdata->h_in_psset || hpdata->h_updating);
|
|
assert(hpdata->h_alloc_allowed);
|
|
assert((sz & PAGE_MASK) == 0);
|
|
size_t npages = sz >> LG_PAGE;
|
|
assert(npages <= hpdata_longest_free_range_get(hpdata));
|
|
|
|
size_t result;
|
|
|
|
size_t start = 0;
|
|
/*
|
|
* These are dead stores, but the compiler will issue warnings on them
|
|
* since it can't tell statically that found is always true below.
|
|
*/
|
|
size_t begin = 0;
|
|
size_t len = 0;
|
|
|
|
size_t largest_unchosen_range = 0;
|
|
while (true) {
|
|
bool found = fb_urange_iter(
|
|
hpdata->active_pages, HUGEPAGE_PAGES, start, &begin, &len);
|
|
/*
|
|
* A precondition to this function is that hpdata must be able
|
|
* to serve the allocation.
|
|
*/
|
|
assert(found);
|
|
assert(len <= hpdata_longest_free_range_get(hpdata));
|
|
if (len >= npages) {
|
|
/*
|
|
* We use first-fit within the page slabs; this gives
|
|
* bounded worst-case fragmentation within a slab. It's
|
|
* not necessarily right; we could experiment with
|
|
* various other options.
|
|
*/
|
|
break;
|
|
}
|
|
if (len > largest_unchosen_range) {
|
|
largest_unchosen_range = len;
|
|
}
|
|
start = begin + len;
|
|
}
|
|
/* We found a range; remember it. */
|
|
result = begin;
|
|
fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
|
|
hpdata->h_nactive += npages;
|
|
|
|
/*
|
|
* We might be about to dirty some memory for the first time; update our
|
|
* count if so.
|
|
*/
|
|
size_t new_dirty = fb_ucount(
|
|
hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
|
|
fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
|
|
hpdata->h_ntouched += new_dirty;
|
|
|
|
/*
|
|
* If we allocated out of a range that was the longest in the hpdata, it
|
|
* might be the only one of that size and we'll have to adjust the
|
|
* metadata.
|
|
*/
|
|
if (len == hpdata_longest_free_range_get(hpdata)) {
|
|
start = begin + npages;
|
|
while (start < HUGEPAGE_PAGES) {
|
|
bool found = fb_urange_iter(hpdata->active_pages,
|
|
HUGEPAGE_PAGES, start, &begin, &len);
|
|
if (!found) {
|
|
break;
|
|
}
|
|
assert(len <= hpdata_longest_free_range_get(hpdata));
|
|
if (len == hpdata_longest_free_range_get(hpdata)) {
|
|
largest_unchosen_range = len;
|
|
break;
|
|
}
|
|
if (len > largest_unchosen_range) {
|
|
largest_unchosen_range = len;
|
|
}
|
|
start = begin + len;
|
|
}
|
|
hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
|
|
}
|
|
|
|
hpdata_assert_consistent(hpdata);
|
|
return (
|
|
void *)((byte_t *)hpdata_addr_get(hpdata) + (result << LG_PAGE));
|
|
}
|
|
|
|
void
|
|
hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
|
|
hpdata_assert_consistent(hpdata);
|
|
/* See the comment in reserve. */
|
|
assert(!hpdata->h_in_psset || hpdata->h_updating);
|
|
assert(((uintptr_t)addr & PAGE_MASK) == 0);
|
|
assert((sz & PAGE_MASK) == 0);
|
|
size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
|
|
>> LG_PAGE;
|
|
assert(begin < HUGEPAGE_PAGES);
|
|
size_t npages = sz >> LG_PAGE;
|
|
size_t old_longest_range = hpdata_longest_free_range_get(hpdata);
|
|
|
|
fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
|
|
/* We might have just created a new, larger range. */
|
|
size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES, begin)
|
|
+ 1);
|
|
size_t new_end = fb_ffs(
|
|
hpdata->active_pages, HUGEPAGE_PAGES, begin + npages - 1);
|
|
size_t new_range_len = new_end - new_begin;
|
|
|
|
if (new_range_len > old_longest_range) {
|
|
hpdata_longest_free_range_set(hpdata, new_range_len);
|
|
}
|
|
|
|
hpdata->h_nactive -= npages;
|
|
|
|
hpdata_assert_consistent(hpdata);
|
|
}
|
|
|
|
size_t
|
|
hpdata_find_alloc_offsets(hpdata_t *hpdata, size_t sz,
|
|
hpdata_alloc_offset_t *offsets, size_t max_nallocs) {
|
|
hpdata_assert_consistent(hpdata);
|
|
assert((sz & PAGE_MASK) == 0);
|
|
assert(1 <= max_nallocs);
|
|
const size_t npages = sz >> LG_PAGE;
|
|
/* We should be able to find at least one allocation */
|
|
assert(npages <= hpdata_longest_free_range_get(hpdata));
|
|
|
|
size_t nallocs = 0;
|
|
size_t start = 0;
|
|
size_t longest_len = 0;
|
|
while (true) {
|
|
size_t begin = 0;
|
|
size_t len = 0;
|
|
|
|
const bool found = fb_urange_iter(
|
|
hpdata->active_pages, HUGEPAGE_PAGES, start, &begin, &len);
|
|
if (!found) {
|
|
/* we should have found at least one */
|
|
assert(0 < nallocs);
|
|
break;
|
|
}
|
|
|
|
/* carve up the free range, if it's large enough */
|
|
while (npages <= len) {
|
|
offsets->len_before = len;
|
|
offsets->index = begin;
|
|
offsets->longest_len = longest_len;
|
|
offsets += 1;
|
|
|
|
nallocs += 1;
|
|
if (nallocs == max_nallocs) {
|
|
/* cause start to be == HUGEPAGE_PAGES to break out of the outer loop */
|
|
begin = HUGEPAGE_PAGES;
|
|
len = 0;
|
|
break;
|
|
}
|
|
|
|
begin += npages;
|
|
len -= npages;
|
|
}
|
|
|
|
start = begin + len;
|
|
assert(start <= HUGEPAGE_PAGES);
|
|
if (start == HUGEPAGE_PAGES) {
|
|
break;
|
|
}
|
|
longest_len = max_zu(longest_len, len);
|
|
}
|
|
|
|
/* post-conditions */
|
|
assert(1 <= nallocs);
|
|
assert(nallocs <= max_nallocs);
|
|
|
|
return nallocs;
|
|
}
|
|
|
|
void *
|
|
hpdata_reserve_alloc_offset(
|
|
hpdata_t *hpdata, size_t sz, hpdata_alloc_offset_t *offset) {
|
|
/*
|
|
* This is a metadata change; the hpdata should therefore either not be
|
|
* in the psset, or should have explicitly marked itself as being
|
|
* mid-update.
|
|
*/
|
|
assert(!hpdata->h_in_psset || hpdata->h_updating);
|
|
assert(hpdata->h_alloc_allowed);
|
|
assert((sz & PAGE_MASK) == 0);
|
|
const size_t npages = sz >> LG_PAGE;
|
|
const size_t index = offset->index;
|
|
|
|
fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, index, npages);
|
|
hpdata->h_nactive += npages;
|
|
|
|
/*
|
|
* We might be about to dirty some memory for the first time; update our
|
|
* count if so.
|
|
*/
|
|
size_t new_dirty = fb_ucount(
|
|
hpdata->touched_pages, HUGEPAGE_PAGES, index, npages);
|
|
fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, index, npages);
|
|
hpdata->h_ntouched += new_dirty;
|
|
|
|
return (void *)((byte_t *)hpdata_addr_get(hpdata) + (index << LG_PAGE));
|
|
}
|
|
|
|
void
|
|
hpdata_post_reserve_alloc_offsets(hpdata_t *hpdata, size_t sz,
|
|
hpdata_alloc_offset_t *offsets, size_t nallocs) {
|
|
assert((sz & PAGE_MASK) == 0);
|
|
const size_t npages = sz >> LG_PAGE;
|
|
|
|
if (nallocs == 0) {
|
|
return;
|
|
}
|
|
|
|
size_t max_len = offsets[0].len_before;
|
|
for (size_t i = 1; i < nallocs; i += 1) {
|
|
max_len = max_zu(max_len, offsets[i].len_before);
|
|
}
|
|
|
|
const size_t prev_longest = hpdata_longest_free_range_get(hpdata);
|
|
assert(max_len <= prev_longest);
|
|
if (max_len < prev_longest) {
|
|
/* no need to update the hpdata longest range */
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* If we allocated out of a range that was the longest in the hpdata, it
|
|
* might be the only one of that size and we'll have to adjust the
|
|
* metadata.
|
|
*/
|
|
const size_t len_before = offsets[nallocs - 1].len_before;
|
|
size_t start = offsets[nallocs - 1].index + len_before;
|
|
size_t longest_len = max_zu(
|
|
offsets[nallocs - 1].longest_len, len_before - npages);
|
|
|
|
const size_t rest = HUGEPAGE_PAGES - start;
|
|
/*
|
|
* Only look at the rest if we think we'll find a range longer than what
|
|
* we already have. This also implicitly checks for rest == 0, so we don't
|
|
* have to check before the first call to fb_urange_iter().
|
|
*/
|
|
if (longest_len < rest) {
|
|
while (true) {
|
|
size_t begin = 0;
|
|
size_t len = 0;
|
|
|
|
const bool found = fb_urange_iter(hpdata->active_pages,
|
|
HUGEPAGE_PAGES, start, &begin, &len);
|
|
if (!found) {
|
|
break;
|
|
}
|
|
|
|
if (longest_len < len) {
|
|
if (len == prev_longest) {
|
|
/* it's already set to the right value */
|
|
assert(hpdata_longest_free_range_get(
|
|
hpdata)
|
|
== len);
|
|
assert(fb_urange_longest(
|
|
hpdata->active_pages,
|
|
HUGEPAGE_PAGES)
|
|
== len);
|
|
return;
|
|
}
|
|
longest_len = len;
|
|
}
|
|
|
|
start = begin + len;
|
|
assert(start <= HUGEPAGE_PAGES);
|
|
if (start == HUGEPAGE_PAGES) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
|
|
== longest_len);
|
|
hpdata_longest_free_range_set(hpdata, longest_len);
|
|
}
|
|
|
|
size_t
|
|
hpdata_purge_begin(
|
|
hpdata_t *hpdata, hpdata_purge_state_t *purge_state, size_t *nranges) {
|
|
hpdata_assert_consistent(hpdata);
|
|
/*
|
|
* See the comment below; we might purge any inactive extent, so it's
|
|
* unsafe for any other thread to turn any inactive extent active while
|
|
* we're operating on it.
|
|
*/
|
|
assert(!hpdata_alloc_allowed_get(hpdata));
|
|
|
|
purge_state->npurged = 0;
|
|
purge_state->next_purge_search_begin = 0;
|
|
|
|
/*
|
|
* Initialize to_purge.
|
|
*
|
|
* It's possible to end up in situations where two dirty extents are
|
|
* separated by a retained extent:
|
|
* - 1 page allocated.
|
|
* - 1 page allocated.
|
|
* - 1 pages allocated.
|
|
*
|
|
* If the middle page is freed and purged, and then the first and third
|
|
* pages are freed, and then another purge pass happens, the hpdata
|
|
* looks like this:
|
|
* - 1 page dirty.
|
|
* - 1 page retained.
|
|
* - 1 page dirty.
|
|
*
|
|
* But it's safe to do a single 3-page purge.
|
|
*
|
|
* We do this by first computing the dirty pages, and then filling in
|
|
* any gaps by extending each range in the dirty bitmap to extend until
|
|
* the next active page. This purges more pages, but the expensive part
|
|
* of purging is the TLB shootdowns, rather than the kernel state
|
|
* tracking; doing a little bit more of the latter is fine if it saves
|
|
* us from doing some of the former.
|
|
*/
|
|
|
|
/*
|
|
* The dirty pages are those that are touched but not active. Note that
|
|
* in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the
|
|
* fb_group_t is 64 bits, so this is 64 bytes, spread across 8
|
|
* fb_group_ts.
|
|
*/
|
|
fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
|
|
fb_init(dirty_pages, HUGEPAGE_PAGES);
|
|
fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
|
|
fb_bit_and(
|
|
dirty_pages, dirty_pages, hpdata->touched_pages, HUGEPAGE_PAGES);
|
|
|
|
fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
|
|
size_t next_bit = 0;
|
|
*nranges = 0;
|
|
while (next_bit < HUGEPAGE_PAGES) {
|
|
size_t next_dirty = fb_ffs(
|
|
dirty_pages, HUGEPAGE_PAGES, next_bit);
|
|
/* Recall that fb_ffs returns nbits if no set bit is found. */
|
|
if (next_dirty == HUGEPAGE_PAGES) {
|
|
break;
|
|
}
|
|
size_t next_active = fb_ffs(
|
|
hpdata->active_pages, HUGEPAGE_PAGES, next_dirty);
|
|
/*
|
|
* Don't purge past the end of the dirty extent, into retained
|
|
* pages. This helps the kernel a tiny bit, but honestly it's
|
|
* mostly helpful for testing (where we tend to write test cases
|
|
* that think in terms of the dirty ranges).
|
|
*/
|
|
ssize_t last_dirty = fb_fls(
|
|
dirty_pages, HUGEPAGE_PAGES, next_active - 1);
|
|
assert(last_dirty >= 0);
|
|
assert((size_t)last_dirty >= next_dirty);
|
|
assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
|
|
|
|
fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
|
|
last_dirty - next_dirty + 1);
|
|
(*nranges)++;
|
|
next_bit = next_active + 1;
|
|
}
|
|
|
|
/* We should purge, at least, everything dirty. */
|
|
size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
|
|
purge_state->ndirty_to_purge = ndirty;
|
|
assert(ndirty <= fb_scount(
|
|
purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
|
assert(ndirty
|
|
== fb_scount(dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
|
assert(*nranges <= ndirty);
|
|
assert(ndirty == 0 || *nranges > 0);
|
|
|
|
hpdata_assert_consistent(hpdata);
|
|
|
|
return ndirty;
|
|
}
|
|
|
|
bool
|
|
hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
|
|
void **r_purge_addr, size_t *r_purge_size) {
|
|
/*
|
|
* Note that we don't have a consistency check here; we're accessing
|
|
* hpdata without synchronization, and therefore have no right to expect
|
|
* a consistent state.
|
|
*/
|
|
assert(!hpdata_alloc_allowed_get(hpdata));
|
|
|
|
if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
|
|
return false;
|
|
}
|
|
size_t purge_begin;
|
|
size_t purge_len;
|
|
bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES,
|
|
purge_state->next_purge_search_begin, &purge_begin, &purge_len);
|
|
if (!found_range) {
|
|
return false;
|
|
}
|
|
|
|
*r_purge_addr = (void *)((byte_t *)hpdata_addr_get(hpdata)
|
|
+ purge_begin * PAGE);
|
|
*r_purge_size = purge_len * PAGE;
|
|
|
|
purge_state->next_purge_search_begin = purge_begin + purge_len;
|
|
purge_state->npurged += purge_len;
|
|
assert(purge_state->npurged <= HUGEPAGE_PAGES);
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
|
|
assert(!hpdata_alloc_allowed_get(hpdata));
|
|
hpdata_assert_consistent(hpdata);
|
|
/* See the comment in reserve. */
|
|
assert(!hpdata->h_in_psset || hpdata->h_updating);
|
|
|
|
assert(purge_state->npurged
|
|
== fb_scount(
|
|
purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
|
assert(purge_state->npurged >= purge_state->ndirty_to_purge);
|
|
|
|
fb_bit_not(
|
|
purge_state->to_purge, purge_state->to_purge, HUGEPAGE_PAGES);
|
|
fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
|
|
purge_state->to_purge, HUGEPAGE_PAGES);
|
|
assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
|
|
hpdata->h_ntouched -= purge_state->ndirty_to_purge;
|
|
|
|
hpdata_assert_consistent(hpdata);
|
|
}
|
|
|
|
void
|
|
hpdata_hugify(hpdata_t *hpdata) {
|
|
hpdata_assert_consistent(hpdata);
|
|
hpdata->h_huge = true;
|
|
fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
|
|
hpdata->h_ntouched = HUGEPAGE_PAGES;
|
|
hpdata_assert_consistent(hpdata);
|
|
}
|
|
|
|
void
|
|
hpdata_dehugify(hpdata_t *hpdata) {
|
|
hpdata_assert_consistent(hpdata);
|
|
hpdata->h_huge = false;
|
|
hpdata_assert_consistent(hpdata);
|
|
}
|