[HPA] Add ability to start page as huge and more flexibility for purging

This commit is contained in:
Slobodan Predolac 2025-08-25 13:23:07 -07:00 committed by Guangli Dai
parent ace437d26a
commit a199278f37
20 changed files with 1231 additions and 116 deletions

View file

@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
/* min_purge_interval_ms */
5 * 1000,
/* experimental_max_purge_nhp */
-1};
-1,
/* purge_threshold */
1,
/* min_purge_delay_ms */
0,
/* hugify_style */
hpa_hugify_style_lazy};
static hpa_shard_opts_t test_hpa_shard_opts_purge = {
/* slab_max_alloc */
@ -55,7 +61,37 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
/* min_purge_interval_ms */
5 * 1000,
/* experimental_max_purge_nhp */
-1};
-1,
/* purge_threshold */
1,
/* min_purge_delay_ms */
0,
/* hugify_style */
hpa_hugify_style_lazy};
static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
/* slab_max_alloc */
HUGEPAGE,
/* hugification_threshold */
0.9 * HUGEPAGE,
/* dirty_mult */
FXP_INIT_PERCENT(11),
/* deferral_allowed */
true,
/* hugify_delay_ms */
0,
/* hugify_sync */
false,
/* min_purge_interval_ms */
5,
/* experimental_max_purge_nhp */
-1,
/* purge_threshold */
HUGEPAGE - 5 * PAGE,
/* min_purge_delay_ms */
10,
/* hugify_style */
hpa_hugify_style_eager};
static hpa_shard_t *
create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
@ -365,10 +401,11 @@ defer_test_unmap(void *ptr, size_t size) {
}
static size_t ndefer_purge_calls = 0;
static size_t npurge_size = 0;
static void
defer_test_purge(void *ptr, size_t size) {
(void)ptr;
(void)size;
npurge_size = size;
++ndefer_purge_calls;
}
@ -783,6 +820,625 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
}
TEST_END
TEST_BEGIN(test_starts_huge) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
|| !config_stats);
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
opts.deferral_allowed = true;
opts.min_purge_delay_ms = 10;
opts.min_purge_interval_ms = 0;
defer_vectorized_purge_called = false;
ndefer_purge_calls = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init2(&defer_curtime, 100, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum { NALLOCS = 2 * HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Deallocate 75% */
int pages_to_deallocate = (int)(0.75 * NALLOCS);
for (int i = 0; i < pages_to_deallocate; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
/*
* While there is enough to purge as we have one empty page and that
* one meets the threshold, we need to respect the delay, so no purging
* should happen yet.
*/
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(0, ndefer_purge_calls, "Purged too early, delay==10ms");
nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
/* Now, enough time has passed, so we expect to purge */
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(1, ndefer_purge_calls, "Expected purge");
/*
* We purged one hugepage, so we expect to have one non-full page and it
* should have half of the other dirty.
*/
psset_stats_t *stat = &shard->psset.stats;
expect_zu_eq(
stat->empty_slabs[1].npageslabs, 0, "Expected zero huge slabs");
expect_zu_eq(stat->empty_slabs[0].npageslabs, 1, "Expected 1 nh slab");
expect_zu_eq(stat->full_slabs[0].npageslabs, 0, "");
expect_zu_eq(stat->full_slabs[1].npageslabs, 0, "");
expect_zu_eq(
stat->merged.ndirty, HUGEPAGE_PAGES / 2, "One HP half dirty");
/*
* We now allocate one more PAGE than a half the hugepage because we
* want to make sure that one more hugepage is needed.
*/
deferred_work_generated = false;
const size_t HALF = HUGEPAGE_PAGES / 2;
edatas[1] = pai_alloc(tsdn, &shard->pai, PAGE * (HALF + 1), PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[1], "Unexpected null edata");
expect_false(deferred_work_generated, "No page is purgable");
expect_zu_eq(stat->empty_slabs[1].npageslabs, 0, "");
expect_zu_eq(stat->empty_slabs[0].npageslabs, 0, "");
expect_zu_eq(stat->full_slabs[0].npageslabs, 0, "");
expect_zu_eq(stat->full_slabs[1].npageslabs, 0, "");
/*
* We expect that all inactive bytes on the second page are counted as
* dirty (this is because the page was huge and empty when we purged
* it, thus, it is assumed to come back as huge, thus all the bytes are
* counted as touched).
*/
expect_zu_eq(stat->merged.ndirty, 2 * HALF - 1,
"2nd page is huge because it was empty and huge when purged");
expect_zu_eq(stat->merged.nactive, HALF + (HALF + 1), "1st + 2nd");
nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
pai_dalloc(tsdn, &shard->pai, edatas[1], &deferred_work_generated);
expect_true(deferred_work_generated, "");
expect_zu_eq(stat->merged.ndirty, 3 * HALF, "1st + 2nd");
/*
* Deallocate last allocation and confirm that page is empty again, and
* once new minimum delay is reached, page should be purged.
*/
ndefer_purge_calls = 0;
nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(1, ndefer_purge_calls, "");
expect_zu_eq(stat->merged.ndirty, HALF, "2nd cleared as it was empty");
ndefer_purge_calls = 0;
/* Deallocate all the rest, but leave only two active */
for (int i = pages_to_deallocate; i < NALLOCS - 2; ++i) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
/*
* With prior pai_dalloc our last page becomes purgable, however we
* still want to respect the delay. Thus, it is not time to purge yet.
*/
hpa_shard_do_deferred_work(tsdn, shard);
expect_true(deferred_work_generated, "Above limit, but not time yet");
expect_zu_eq(0, ndefer_purge_calls, "");
/*
* Finally, we move the time ahead, and we confirm that purge happens
* and that we have exactly two active base pages and none dirty.
*/
nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
hpa_shard_do_deferred_work(tsdn, shard);
expect_true(deferred_work_generated, "Above limit, but not time yet");
expect_zu_eq(1, ndefer_purge_calls, "");
expect_zu_eq(stat->merged.ndirty, 0, "Purged all");
expect_zu_eq(stat->merged.nactive, 2, "1st only");
ndefer_purge_calls = 0;
destroy_test_data(shard);
}
TEST_END
TEST_BEGIN(test_start_huge_purge_empty_only) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
|| !config_stats);
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
opts.deferral_allowed = true;
opts.purge_threshold = HUGEPAGE;
opts.min_purge_delay_ms = 0;
opts.hugify_style = hpa_hugify_style_eager;
opts.min_purge_interval_ms = 0;
ndefer_purge_calls = 0;
npurge_size = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 10 * 1000 * 1000);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum { NALLOCS = 2 * HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Deallocate all from the first and one PAGE from the second HP. */
for (int i = 0; i < NALLOCS / 2 + 1; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
hpa_shard_do_deferred_work(tsdn, shard);
expect_true(deferred_work_generated, "");
expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
expect_zu_eq(HUGEPAGE, npurge_size, "Purge whole folio");
expect_zu_eq(shard->psset.stats.merged.ndirty, 1, "");
expect_zu_eq(shard->psset.stats.merged.nactive, HUGEPAGE_PAGES - 1, "");
ndefer_purge_calls = 0;
npurge_size = 0;
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(0, ndefer_purge_calls, "Should not purge anything");
/* Allocate and free 2*PAGE so that it spills into second page again */
edatas[0] = pai_alloc(tsdn, &shard->pai, 2 * PAGE, PAGE, false, false,
false, &deferred_work_generated);
pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
expect_true(deferred_work_generated, "");
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
expect_zu_eq(HUGEPAGE, npurge_size, "Purge whole folio");
ndefer_purge_calls = 0;
destroy_test_data(shard);
}
TEST_END
TEST_BEGIN(test_assume_huge_purge_fully) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
|| !config_stats);
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
opts.deferral_allowed = true;
opts.purge_threshold = PAGE;
opts.hugification_threshold = HUGEPAGE;
opts.min_purge_delay_ms = 0;
opts.min_purge_interval_ms = 0;
opts.hugify_style = hpa_hugify_style_eager;
opts.dirty_mult = FXP_INIT_PERCENT(1);
ndefer_purge_calls = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 10 * 1000 * 1000);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum { NALLOCS = HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Deallocate all */
for (int i = 0; i < NALLOCS; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
hpa_shard_do_deferred_work(tsdn, shard);
expect_true(deferred_work_generated, "");
expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
/* Stats should say no active */
expect_zu_eq(shard->psset.stats.merged.nactive, 0, "");
expect_zu_eq(
shard->psset.stats.empty_slabs[0].npageslabs, 1, "Non huge");
npurge_size = 0;
edatas[0] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
false, &deferred_work_generated);
expect_ptr_not_null(edatas[0], "Unexpected null edata");
expect_zu_eq(shard->psset.stats.merged.nactive, 1, "");
expect_zu_eq(shard->psset.stats.slabs[1].npageslabs, 1, "Huge nonfull");
pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
expect_true(deferred_work_generated, "");
ndefer_purge_calls = 0;
npurge_size = 0;
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
expect_zu_eq(HUGEPAGE, npurge_size, "Should purge full folio");
/* Now allocate all, free 10%, alloc 5%, assert non-huge */
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
int ten_pct = NALLOCS / 10;
for (int i = 0; i < ten_pct; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
ndefer_purge_calls = 0;
npurge_size = 0;
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
expect_zu_eq(
ten_pct * PAGE, npurge_size, "Should purge 10 percent of pages");
for (int i = 0; i < ten_pct / 2; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
expect_zu_eq(
shard->psset.stats.slabs[0].npageslabs, 1, "Nonhuge nonfull");
expect_zu_eq(shard->psset.stats.merged.ndirty, 0, "No dirty");
npurge_size = 0;
ndefer_purge_calls = 0;
destroy_test_data(shard);
}
TEST_END
TEST_BEGIN(test_eager_with_purge_threshold) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
const size_t THRESHOLD = 10;
hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
opts.deferral_allowed = true;
opts.purge_threshold = THRESHOLD * PAGE;
opts.min_purge_delay_ms = 0;
opts.hugify_style = hpa_hugify_style_eager;
opts.dirty_mult = FXP_INIT_PERCENT(0);
ndefer_purge_calls = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 10 * 1000 * 1000);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum { NALLOCS = HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Deallocate less then threshold PAGEs. */
for (size_t i = 0; i < THRESHOLD - 1; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
hpa_shard_do_deferred_work(tsdn, shard);
expect_false(deferred_work_generated, "No page is purgable");
expect_zu_eq(0, ndefer_purge_calls, "Should not purge yet");
/* Deallocate one more page to meet the threshold */
pai_dalloc(
tsdn, &shard->pai, edatas[THRESHOLD - 1], &deferred_work_generated);
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(1, ndefer_purge_calls, "Should purge");
expect_zu_eq(shard->psset.stats.merged.ndirty, 0, "");
ndefer_purge_calls = 0;
destroy_test_data(shard);
}
TEST_END
TEST_BEGIN(test_delay_when_not_allowed_deferral) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
const uint64_t DELAY_NS = 100 * 1000 * 1000;
hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
opts.deferral_allowed = false;
opts.purge_threshold = HUGEPAGE - 2 * PAGE;
opts.min_purge_delay_ms = DELAY_NS / (1000 * 1000);
opts.hugify_style = hpa_hugify_style_lazy;
opts.min_purge_interval_ms = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init2(&defer_curtime, 100, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum { NALLOCS = HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
ndefer_purge_calls = 0;
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Deallocate all */
for (int i = 0; i < NALLOCS; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
/* curtime = 100.0s */
hpa_shard_do_deferred_work(tsdn, shard);
expect_true(deferred_work_generated, "");
expect_zu_eq(0, ndefer_purge_calls, "Too early");
nstime_iadd(&defer_curtime, DELAY_NS - 1);
/* This activity will take the curtime=100.1 and reset purgability */
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Dealloc all but 2 pages, purgable delay_ns later*/
for (int i = 0; i < NALLOCS - 2; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
nstime_iadd(&defer_curtime, DELAY_NS);
pai_dalloc(
tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated);
expect_true(ndefer_purge_calls > 0, "Should have purged");
ndefer_purge_calls = 0;
destroy_test_data(shard);
}
TEST_END
TEST_BEGIN(test_deferred_until_time) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
opts.deferral_allowed = true;
opts.purge_threshold = PAGE;
opts.min_purge_delay_ms = 1000;
opts.hugification_threshold = HUGEPAGE / 2;
opts.dirty_mult = FXP_INIT_PERCENT(10);
opts.hugify_style = hpa_hugify_style_none;
opts.min_purge_interval_ms = 500;
opts.hugify_delay_ms = 3000;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
/* Current time = 10ms */
nstime_init(&defer_curtime, 10 * 1000 * 1000);
/* Allocate one huge page */
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum { NALLOCS = HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
ndefer_purge_calls = 0;
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Deallocate 25% */
for (int i = 0; i < NALLOCS / 4; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
expect_true(deferred_work_generated, "We should hugify and purge");
/* Current time = 300ms, purge_eligible at 300ms + 1000ms */
nstime_init(&defer_curtime, 300UL * 1000 * 1000);
for (int i = NALLOCS / 4; i < NALLOCS; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
expect_true(deferred_work_generated, "Purge work generated");
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(0, ndefer_purge_calls, "not time for purging yet");
/* Current time = 900ms, purge_eligible at 1300ms */
nstime_init(&defer_curtime, 900UL * 1000 * 1000);
uint64_t until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
expect_u64_eq(until_ns, BACKGROUND_THREAD_DEFERRED_MIN,
"First pass did not happen");
/* Fake that first pass happened more than min_purge_interval_ago */
nstime_init(&shard->last_purge, 350UL * 1000 * 1000);
shard->stats.npurge_passes = 1;
until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
expect_u64_eq(until_ns, BACKGROUND_THREAD_DEFERRED_MIN,
"No need to heck anything it is more than interval");
nstime_init(&shard->last_purge, 900UL * 1000 * 1000);
nstime_init(&defer_curtime, 1000UL * 1000 * 1000);
/* Next purge expected at 900ms + min_purge_interval = 1400ms */
uint64_t expected_ms = 1400 - 1000;
until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
expect_u64_eq(expected_ms, until_ns / (1000 * 1000), "Next in 400ms");
destroy_test_data(shard);
}
TEST_END
TEST_BEGIN(test_eager_no_hugify_on_threshold) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
|| !config_stats);
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
opts.deferral_allowed = true;
opts.purge_threshold = PAGE;
opts.min_purge_delay_ms = 0;
opts.hugification_threshold = HUGEPAGE * 0.9;
opts.dirty_mult = FXP_INIT_PERCENT(10);
opts.hugify_style = hpa_hugify_style_eager;
opts.min_purge_interval_ms = 0;
opts.hugify_delay_ms = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
/* Current time = 10ms */
nstime_init(&defer_curtime, 10 * 1000 * 1000);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
/* First allocation makes the page huge */
enum { NALLOCS = HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
ndefer_purge_calls = 0;
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
ndefer_hugify_calls = 0;
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(ndefer_hugify_calls, 0, "No hugify needed - eager");
expect_zu_eq(shard->psset.stats.full_slabs[1].npageslabs, 1,
"Page should be full-huge");
/* Deallocate 25% */
for (int i = 0; i < NALLOCS / 4; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
expect_true(deferred_work_generated, "purge is needed");
ndefer_purge_calls = 0;
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(ndefer_hugify_calls, 0, "No hugify needed - eager");
expect_zu_eq(ndefer_purge_calls, 1, "Purge should have happened");
/* Allocate 20% again, so that we are above hugification threshold */
ndefer_purge_calls = 0;
nstime_iadd(&defer_curtime, 800UL * 1000 * 1000);
for (int i = 0; i < NALLOCS / 4 - 1; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(0, ndefer_purge_calls, "no purging needed");
expect_zu_eq(ndefer_hugify_calls, 0, "no hugify - eager");
destroy_test_data(shard);
}
TEST_END
TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
opts.deferral_allowed = true;
opts.purge_threshold = PAGE;
opts.min_purge_delay_ms = 0;
opts.hugification_threshold = HUGEPAGE * 0.25;
opts.dirty_mult = FXP_INIT_PERCENT(10);
opts.hugify_style = hpa_hugify_style_none;
opts.min_purge_interval_ms = 0;
opts.hugify_delay_ms = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
/* Current time = 10ms */
nstime_init(&defer_curtime, 10 * 1000 * 1000);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
/* First allocation makes the page huge */
enum { NALLOCS = HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
ndefer_purge_calls = 0;
for (int i = 0; i < NALLOCS / 2; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
expect_false(hpdata_huge_get(ps), "Page should be non-huge");
ndefer_hugify_calls = 0;
ndefer_purge_calls = 0;
hpa_shard_do_deferred_work(tsdn, shard);
expect_zu_eq(ndefer_hugify_calls, 0, "Hugify none, no syscall");
ps = psset_pick_alloc(&shard->psset, PAGE);
expect_true(ps, "Page should be huge");
destroy_test_data(shard);
}
TEST_END
int
main(void) {
/*
@ -801,5 +1457,10 @@ main(void) {
test_alloc_dalloc_batch, test_defer_time,
test_purge_no_infinite_loop, test_no_min_purge_interval,
test_min_purge_interval, test_purge,
test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero);
test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero,
test_starts_huge, test_start_huge_purge_empty_only,
test_assume_huge_purge_fully, test_eager_with_purge_threshold,
test_delay_when_not_allowed_deferral, test_deferred_until_time,
test_eager_no_hugify_on_threshold,
test_hpa_hugify_style_none_huge_no_syscall);
}

View file

@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
/* min_purge_interval_ms */
5 * 1000,
/* experimental_max_purge_nhp */
-1};
-1,
/* purge_threshold */
1,
/* purge_delay_ms */
0,
/* hugify_style */
hpa_hugify_style_lazy};
static hpa_shard_t *
create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {

View file

@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
/* min_purge_interval_ms */
5 * 1000,
/* experimental_max_purge_nhp */
-1};
-1,
/* purge_threshold */
1,
/* min_purge_delay_ms */
0,
/* hugify_style */
hpa_hugify_style_lazy};
static hpa_shard_t *
create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {

View file

@ -5,7 +5,7 @@
TEST_BEGIN(test_reserve_alloc) {
hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
/* Allocating a page at a time, we should do first fit. */
for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
@ -57,7 +57,7 @@ TEST_END
TEST_BEGIN(test_purge_simple) {
hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
expect_ptr_eq(alloc, HPDATA_ADDR, "");
@ -101,7 +101,7 @@ TEST_END
*/
TEST_BEGIN(test_purge_intervening_dalloc) {
hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
/* Allocate the first 3/4 of the pages. */
void *alloc = hpdata_reserve_alloc(
@ -164,7 +164,7 @@ TEST_BEGIN(test_purge_over_retained) {
size_t purge_size;
hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
/* Allocate the first 3/4 of the pages. */
void *alloc = hpdata_reserve_alloc(
@ -238,7 +238,7 @@ TEST_END
TEST_BEGIN(test_hugify) {
hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
expect_ptr_eq(alloc, HPDATA_ADDR, "");

View file

@ -313,6 +313,9 @@ TEST_BEGIN(test_mallctl_opt) {
TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always);
TEST_MALLCTL_OPT(const char *, hpa_hugify_style, always);
TEST_MALLCTL_OPT(unsigned, narenas, always);
TEST_MALLCTL_OPT(const char *, percpu_arena, always);
TEST_MALLCTL_OPT(size_t, oversize_threshold, always);

View file

@ -124,7 +124,8 @@ TEST_BEGIN(test_empty) {
test_skip_if(hpa_hugepage_size_exceeds_limit());
bool err;
hpdata_t pageslab;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
bool is_huge = false;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
edata_t alloc;
edata_init_test(&alloc);
@ -141,9 +142,10 @@ TEST_END
TEST_BEGIN(test_fill) {
test_skip_if(hpa_hugepage_size_exceeds_limit());
bool err;
bool is_huge = false;
hpdata_t pageslab;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@ -179,7 +181,8 @@ TEST_BEGIN(test_reuse) {
hpdata_t *ps;
hpdata_t pageslab;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
bool is_huge = false;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@ -274,7 +277,8 @@ TEST_BEGIN(test_evict) {
hpdata_t *ps;
hpdata_t pageslab;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
bool is_huge = false;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@ -311,9 +315,10 @@ TEST_BEGIN(test_multi_pageslab) {
hpdata_t *ps;
hpdata_t pageslab[2];
hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE);
bool is_huge = false;
hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
hpdata_init(&pageslab[1], (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
PAGESLAB_AGE + 1);
PAGESLAB_AGE + 1, is_huge);
edata_t *alloc[2];
alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@ -376,7 +381,8 @@ TEST_END
TEST_BEGIN(test_stats_merged) {
hpdata_t pageslab;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
bool is_huge = false;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@ -442,7 +448,8 @@ TEST_BEGIN(test_stats_huge) {
test_skip_if(hpa_hugepage_size_exceeds_limit());
hpdata_t pageslab;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
bool is_huge = false;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@ -570,7 +577,8 @@ TEST_BEGIN(test_stats_fullness) {
bool err;
hpdata_t pageslab;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
bool is_huge = false;
hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@ -620,13 +628,15 @@ static void
init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
hpdata_t *worse_pageslab, edata_t *alloc, edata_t *worse_alloc) {
bool err;
bool is_huge = false;
hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE);
hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE, is_huge);
/*
* This pageslab would be better from an address-first-fit POV, but
* worse from an age POV.
*/
hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1);
hpdata_init(
worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1, is_huge);
psset_init(psset);
@ -763,14 +773,15 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
hpdata_t hpdata_nonhuge[NHP];
uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
bool is_huge = false;
for (size_t i = 0; i < NHP; i++) {
hpdata_init(
&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE), 123 + i);
hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
123 + i, is_huge);
psset_insert(&psset, &hpdata_huge[i]);
hpdata_init(&hpdata_nonhuge[i],
(void *)((10 + NHP + i) * HUGEPAGE), 456 + i);
(void *)((10 + NHP + i) * HUGEPAGE), 456 + i, is_huge);
psset_insert(&psset, &hpdata_nonhuge[i]);
}
for (int i = 0; i < 2 * NHP; i++) {
@ -802,7 +813,7 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
* further.
*/
for (int i = 0; i < NHP; i++) {
hpdata = psset_pick_purge(&psset);
hpdata = psset_pick_purge(&psset, NULL);
assert_true(nonhuge_begin <= (uintptr_t)hpdata
&& (uintptr_t)hpdata < nonhuge_end,
"");
@ -812,7 +823,7 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
psset_update_end(&psset, hpdata);
}
for (int i = 0; i < NHP; i++) {
hpdata = psset_pick_purge(&psset);
hpdata = psset_pick_purge(&psset, NULL);
expect_true(huge_begin <= (uintptr_t)hpdata
&& (uintptr_t)hpdata < huge_end,
"");
@ -825,6 +836,72 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
}
TEST_END
TEST_BEGIN(test_purge_timing) {
test_skip_if(hpa_hugepage_size_exceeds_limit());
void *ptr;
psset_t psset;
psset_init(&psset);
hpdata_t hpdata_empty_nh;
hpdata_t hpdata_empty_huge;
hpdata_t hpdata_nonempty;
nstime_t basetime, now, empty_nh_tm, empty_huge_tm, nonempty_tm;
const uint64_t BASE_SEC = 100;
nstime_init2(&basetime, BASE_SEC, 0);
/* Create and add to psset */
hpdata_init(&hpdata_empty_nh, (void *)(9 * HUGEPAGE), 102, false);
psset_insert(&psset, &hpdata_empty_nh);
hpdata_init(&hpdata_empty_huge, (void *)(10 * HUGEPAGE), 123, true);
psset_insert(&psset, &hpdata_empty_huge);
hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, false);
psset_insert(&psset, &hpdata_nonempty);
psset_update_begin(&psset, &hpdata_empty_nh);
ptr = hpdata_reserve_alloc(&hpdata_empty_nh, PAGE);
expect_ptr_eq(hpdata_addr_get(&hpdata_empty_nh), ptr, "");
hpdata_unreserve(&hpdata_empty_nh, ptr, PAGE);
hpdata_purge_allowed_set(&hpdata_empty_nh, true);
nstime_init2(&empty_nh_tm, BASE_SEC + 100, 0);
hpdata_time_purge_allowed_set(&hpdata_empty_nh, &empty_nh_tm);
psset_update_end(&psset, &hpdata_empty_nh);
psset_update_begin(&psset, &hpdata_empty_huge);
ptr = hpdata_reserve_alloc(&hpdata_empty_huge, PAGE);
expect_ptr_eq(hpdata_addr_get(&hpdata_empty_huge), ptr, "");
hpdata_unreserve(&hpdata_empty_huge, ptr, PAGE);
nstime_init2(&empty_huge_tm, BASE_SEC + 110, 0);
hpdata_time_purge_allowed_set(&hpdata_empty_huge, &empty_huge_tm);
hpdata_purge_allowed_set(&hpdata_empty_huge, true);
psset_update_end(&psset, &hpdata_empty_huge);
psset_update_begin(&psset, &hpdata_nonempty);
ptr = hpdata_reserve_alloc(&hpdata_nonempty, 10 * PAGE);
expect_ptr_eq(hpdata_addr_get(&hpdata_nonempty), ptr, "");
hpdata_unreserve(&hpdata_nonempty, ptr, 9 * PAGE);
hpdata_purge_allowed_set(&hpdata_nonempty, true);
nstime_init2(&nonempty_tm, BASE_SEC + 80, 0);
hpdata_time_purge_allowed_set(&hpdata_nonempty, &nonempty_tm);
psset_update_end(&psset, &hpdata_nonempty);
/* The best to purge with no time restriction is the huge one */
hpdata_t *ps = psset_pick_purge(&psset, NULL);
expect_ptr_eq(&hpdata_empty_huge, ps, "Without tick, pick huge");
/* However, only the one eligible for purging can be picked */
nstime_init2(&now, BASE_SEC + 90, 0);
ps = psset_pick_purge(&psset, &now);
expect_ptr_eq(&hpdata_nonempty, ps, "Only non empty purgable");
/* When all eligible, huge empty is the best */
nstime_init2(&now, BASE_SEC + 110, 0);
ps = psset_pick_purge(&psset, &now);
expect_ptr_eq(&hpdata_empty_huge, ps, "Huge empty is the best");
}
TEST_END
TEST_BEGIN(test_purge_prefers_empty) {
test_skip_if(hpa_hugepage_size_exceeds_limit());
void *ptr;
@ -834,9 +911,10 @@ TEST_BEGIN(test_purge_prefers_empty) {
hpdata_t hpdata_empty;
hpdata_t hpdata_nonempty;
hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123);
bool is_huge = false;
hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123, is_huge);
psset_insert(&psset, &hpdata_empty);
hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456);
hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, is_huge);
psset_insert(&psset, &hpdata_nonempty);
psset_update_begin(&psset, &hpdata_empty);
@ -857,7 +935,7 @@ TEST_BEGIN(test_purge_prefers_empty) {
* The nonempty slab has 9 dirty pages, while the empty one has only 1.
* We should still pick the empty one for purging.
*/
hpdata_t *to_purge = psset_pick_purge(&psset);
hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
expect_ptr_eq(&hpdata_empty, to_purge, "");
}
TEST_END
@ -876,13 +954,16 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
uintptr_t cur_addr = 100 * HUGEPAGE;
uint64_t cur_age = 123;
bool is_huge = false;
for (int i = 0; i < NHP; i++) {
hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age);
hpdata_init(
&hpdata_huge[i], (void *)cur_addr, cur_age, is_huge);
cur_addr += HUGEPAGE;
cur_age++;
psset_insert(&psset, &hpdata_huge[i]);
hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age);
hpdata_init(
&hpdata_nonhuge[i], (void *)cur_addr, cur_age, is_huge);
cur_addr += HUGEPAGE;
cur_age++;
psset_insert(&psset, &hpdata_nonhuge[i]);
@ -917,14 +998,14 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
* any of the non-huge ones for purging.
*/
for (int i = 0; i < NHP; i++) {
hpdata_t *to_purge = psset_pick_purge(&psset);
hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
expect_ptr_eq(&hpdata_huge[i], to_purge, "");
psset_update_begin(&psset, to_purge);
hpdata_purge_allowed_set(to_purge, false);
psset_update_end(&psset, to_purge);
}
for (int i = 0; i < NHP; i++) {
hpdata_t *to_purge = psset_pick_purge(&psset);
hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
expect_ptr_eq(&hpdata_nonhuge[i], to_purge, "");
psset_update_begin(&psset, to_purge);
hpdata_purge_allowed_set(to_purge, false);
@ -938,6 +1019,6 @@ main(void) {
return test_no_reentrancy(test_empty, test_fill, test_reuse, test_evict,
test_multi_pageslab, test_stats_merged, test_stats_huge,
test_stats_fullness, test_oldest_fit, test_insert_remove,
test_purge_prefers_nonhuge, test_purge_prefers_empty,
test_purge_prefers_empty_huge);
test_purge_prefers_nonhuge, test_purge_timing,
test_purge_prefers_empty, test_purge_prefers_empty_huge);
}