if process_madvise is supported, call it when purging hpa

This commit is contained in:
Slobodan Predolac 2025-03-12 12:26:52 -07:00
parent 80e9001af3
commit 04adb149fe
7 changed files with 387 additions and 3 deletions

View file

@ -231,6 +231,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/hash.c \
$(srcroot)test/unit/hook.c \
$(srcroot)test/unit/hpa.c \
$(srcroot)test/unit/hpa_vectorized_madvise.c \
$(srcroot)test/unit/hpa_background_thread.c \
$(srcroot)test/unit/hpdata.c \
$(srcroot)test/unit/huge.c \

View file

@ -13,6 +13,7 @@ struct hpa_hooks_s {
void (*dehugify)(void *ptr, size_t size);
void (*curtime)(nstime_t *r_time, bool first_reading);
uint64_t (*ms_since)(nstime_t *r_time);
bool (*vectorized_purge)(void* vec, size_t vlen, size_t nbytes);
};
extern const hpa_hooks_t hpa_hooks_default;

View file

@ -8,6 +8,16 @@
#define HPA_EDEN_SIZE (128 * HUGEPAGE)
#define HPA_MIN_VAR_VEC_SIZE 8
#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
typedef struct iovec hpa_io_vector_t;
#else
typedef struct {
void *iov_base;
size_t iov_len;
} hpa_io_vector_t;
#endif
static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
size_t alignment, bool zero, bool guarded, bool frequent_reuse,
bool *deferred_work_generated);
@ -422,6 +432,24 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
return to_hugify != NULL || hpa_should_purge(tsdn, shard);
}
/* If we fail vectorized purge, we will do single */
static void
hpa_try_vectorized_purge(hpa_shard_t *shard, hpa_io_vector_t *vec,
size_t vlen, size_t nbytes) {
bool success = opt_process_madvise_max_batch > 0
&& !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
if (!success) {
/* On failure, it is safe to purge again (potential perf
* penalty) If kernel can tell exactly which regions
* failed, we could avoid that penalty.
*/
for (size_t i = 0; i < vlen; ++i) {
shard->central->hooks.purge(vec[i].iov_base,
vec[i].iov_len);
}
}
}
/* Returns whether or not we purged anything. */
static bool
hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@ -470,14 +498,37 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
}
size_t total_purged = 0;
uint64_t purges_this_pass = 0;
assert(opt_process_madvise_max_batch <=
PROCESS_MADVISE_MAX_BATCH_LIMIT);
size_t len = opt_process_madvise_max_batch == 0 ?
HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
void *purge_addr;
size_t purge_size;
size_t cur = 0;
size_t total_batch_bytes = 0;
while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
&purge_size)) {
vec[cur].iov_base = purge_addr;
vec[cur].iov_len = purge_size;
total_purged += purge_size;
assert(total_purged <= HUGEPAGE);
purges_this_pass++;
shard->central->hooks.purge(purge_addr, purge_size);
total_batch_bytes += purge_size;
cur++;
if (cur == len) {
hpa_try_vectorized_purge(shard, vec, len, total_batch_bytes);
assert(total_batch_bytes > 0);
cur = 0;
total_batch_bytes = 0;
}
}
/* Batch was not full */
if (cur > 0) {
hpa_try_vectorized_purge(shard, vec, cur, total_batch_bytes);
}
malloc_mutex_lock(tsdn, &shard->mtx);

View file

@ -10,6 +10,8 @@ static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync);
static void hpa_hooks_dehugify(void *ptr, size_t size);
static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
static bool hpa_hooks_vectorized_purge(
void *vec, size_t vlen, size_t nbytes);
const hpa_hooks_t hpa_hooks_default = {
&hpa_hooks_map,
@ -18,7 +20,8 @@ const hpa_hooks_t hpa_hooks_default = {
&hpa_hooks_hugify,
&hpa_hooks_dehugify,
&hpa_hooks_curtime,
&hpa_hooks_ms_since
&hpa_hooks_ms_since,
&hpa_hooks_vectorized_purge
};
static void *
@ -78,3 +81,14 @@ static uint64_t
hpa_hooks_ms_since(nstime_t *past_nstime) {
return nstime_ms_since(past_nstime);
}
/* Return true if we did not purge all nbytes, or on some error */
static bool
hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
return pages_purge_process_madvise(vec, vlen, nbytes);
#else
return true;
#endif
}

View file

@ -357,6 +357,16 @@ defer_test_purge(void *ptr, size_t size) {
++ndefer_purge_calls;
}
static bool defer_vectorized_purge_called = false;
static bool
defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
(void)vec;
(void)nbytes;
++ndefer_purge_calls;
defer_vectorized_purge_called = true;
return false;
}
static size_t ndefer_hugify_calls = 0;
static bool
defer_test_hugify(void *ptr, size_t size, bool sync) {
@ -392,6 +402,7 @@ TEST_BEGIN(test_defer_time) {
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
@ -506,6 +517,7 @@ TEST_BEGIN(test_no_min_purge_interval) {
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
@ -548,6 +560,7 @@ TEST_BEGIN(test_min_purge_interval) {
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
@ -598,6 +611,7 @@ TEST_BEGIN(test_purge) {
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
@ -664,6 +678,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
@ -732,6 +747,7 @@ TEST_BEGIN(test_demand_purge_slack) {
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
@ -799,6 +815,7 @@ TEST_BEGIN(test_demand_purge_tight) {
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
@ -855,6 +872,44 @@ TEST_BEGIN(test_demand_purge_tight) {
}
TEST_END
TEST_BEGIN(test_vectorized_opt_eq_zero) {
test_skip_if(!hpa_supported() ||
(opt_process_madvise_max_batch != 0));
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
opts.min_purge_interval_ms = 0;
defer_vectorized_purge_called = false;
ndefer_purge_calls = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edata, "Unexpected null edata");
pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
hpa_shard_do_deferred_work(tsdn, shard);
expect_false(defer_vectorized_purge_called, "No vec purge");
expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
destroy_test_data(shard);
}
TEST_END
int
main(void) {
/*
@ -880,5 +935,6 @@ main(void) {
test_purge,
test_experimental_max_purge_nhp,
test_demand_purge_slack,
test_demand_purge_tight);
test_demand_purge_tight,
test_vectorized_opt_eq_zero);
}

View file

@ -0,0 +1,258 @@
#include "test/jemalloc_test.h"
#include "jemalloc/internal/hpa.h"
#include "jemalloc/internal/nstime.h"
#define SHARD_IND 111
#define ALLOC_MAX (HUGEPAGE)
typedef struct test_data_s test_data_t;
struct test_data_s {
/*
* Must be the first member -- we convert back and forth between the
* test_data_t and the hpa_shard_t;
*/
hpa_shard_t shard;
hpa_central_t central;
base_t *base;
edata_cache_t shard_edata_cache;
emap_t emap;
};
static hpa_shard_opts_t test_hpa_shard_opts_default = {
/* slab_max_alloc */
ALLOC_MAX,
/* hugification_threshold */
HUGEPAGE,
/* dirty_mult */
FXP_INIT_PERCENT(25),
/* deferral_allowed */
false,
/* hugify_delay_ms */
10 * 1000,
/* hugify_sync */
false,
/* min_purge_interval_ms */
5 * 1000,
/* experimental_max_purge_nhp */
-1,
/* peak_demand_window_ms */
0
};
static hpa_shard_t *
create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
bool err;
base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
&ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
assert_ptr_not_null(base, "");
test_data_t *test_data = malloc(sizeof(test_data_t));
assert_ptr_not_null(test_data, "");
test_data->base = base;
err = edata_cache_init(&test_data->shard_edata_cache, base);
assert_false(err, "");
err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
assert_false(err, "");
err = hpa_central_init(&test_data->central, test_data->base, hooks);
assert_false(err, "");
err = hpa_shard_init(&test_data->shard, &test_data->central,
&test_data->emap, test_data->base, &test_data->shard_edata_cache,
SHARD_IND, opts);
assert_false(err, "");
return (hpa_shard_t *)test_data;
}
static void
destroy_test_data(hpa_shard_t *shard) {
test_data_t *test_data = (test_data_t *)shard;
base_delete(TSDN_NULL, test_data->base);
free(test_data);
}
static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
static void *
defer_test_map(size_t size) {
void *result = (void *)defer_bump_ptr;
defer_bump_ptr += size;
return result;
}
static void
defer_test_unmap(void *ptr, size_t size) {
(void)ptr;
(void)size;
}
static size_t ndefer_purge_calls = 0;
static void
defer_test_purge(void *ptr, size_t size) {
(void)ptr;
(void)size;
++ndefer_purge_calls;
}
static size_t ndefer_vec_purge_calls = 0;
static bool
defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
(void)vec;
(void)nbytes;
++ndefer_vec_purge_calls;
return false;
}
static bool defer_vec_purge_didfail = false;
static bool defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) {
(void)vec;
(void)vlen;
(void)nbytes;
defer_vec_purge_didfail = true;
return true;
}
static size_t ndefer_hugify_calls = 0;
static bool
defer_test_hugify(void *ptr, size_t size, bool sync) {
++ndefer_hugify_calls;
return false;
}
static size_t ndefer_dehugify_calls = 0;
static void
defer_test_dehugify(void *ptr, size_t size) {
++ndefer_dehugify_calls;
}
static nstime_t defer_curtime;
static void
defer_test_curtime(nstime_t *r_time, bool first_reading) {
*r_time = defer_curtime;
}
static uint64_t
defer_test_ms_since(nstime_t *past_time) {
return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
}
TEST_BEGIN(test_vectorized_failure_fallback) {
test_skip_if(!hpa_supported() ||
(opt_process_madvise_max_batch == 0));
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge_fail;
defer_vec_purge_didfail = false;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
opts.min_purge_interval_ms = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edata, "Unexpected null edata");
pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
hpa_shard_do_deferred_work(tsdn, shard);
expect_true(defer_vec_purge_didfail, "Expect vec purge fail");
expect_zu_eq(1, ndefer_purge_calls, "Expect non-vec purge");
ndefer_purge_calls = 0;
destroy_test_data(shard);
}
TEST_END
TEST_BEGIN(test_more_regions_purged_from_one_page) {
test_skip_if(!hpa_supported() ||
(opt_process_madvise_max_batch == 0) ||
HUGEPAGE_PAGES <= 4);
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
opts.min_purge_interval_ms = 0;
ndefer_vec_purge_calls = 0;
ndefer_purge_calls = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum {NALLOCS = 8 * HUGEPAGE_PAGES};
edata_t *edatas[NALLOCS];
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/* Deallocate almost 3 pages out of 8, and to force batching
* leave the 2nd and 4th PAGE in the first 3 hugepages.
*/
for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
int j = i % HUGEPAGE_PAGES;
if (j != 1 && j != 3) {
pai_dalloc(tsdn, &shard->pai, edatas[i],
&deferred_work_generated);
}
}
hpa_shard_do_deferred_work(tsdn, shard);
/*
* Strict minimum purge interval is not set, we should purge as long as
* we have dirty pages.
*/
expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
/* We purge from 2 huge pages, each one 3 segments. That's 6 non
* vectorized calls, or 2 <= vc <=6 vectorized calls
* (depending on batch size).
*/
size_t nexpected = 2 * (1 + (3 - 1) / opt_process_madvise_max_batch);
expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
ndefer_vec_purge_calls = 0;
destroy_test_data(shard);
}
TEST_END
int
main(void) {
return test_no_reentrancy(
test_vectorized_failure_fallback,
test_more_regions_purged_from_one_page);
}

View file

@ -0,0 +1,3 @@
#!/bin/sh
export MALLOC_CONF="process_madvise_max_batch:2"