diff --git a/Makefile.in b/Makefile.in index b4102d0b..ee3399ec 100644 --- a/Makefile.in +++ b/Makefile.in @@ -231,6 +231,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/hook.c \ $(srcroot)test/unit/hpa.c \ + $(srcroot)test/unit/hpa_vectorized_madvise.c \ $(srcroot)test/unit/hpa_background_thread.c \ $(srcroot)test/unit/hpdata.c \ $(srcroot)test/unit/huge.c \ diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h index b04b04f6..d0618f89 100644 --- a/include/jemalloc/internal/hpa_hooks.h +++ b/include/jemalloc/internal/hpa_hooks.h @@ -13,6 +13,7 @@ struct hpa_hooks_s { void (*dehugify)(void *ptr, size_t size); void (*curtime)(nstime_t *r_time, bool first_reading); uint64_t (*ms_since)(nstime_t *r_time); + bool (*vectorized_purge)(void* vec, size_t vlen, size_t nbytes); }; extern const hpa_hooks_t hpa_hooks_default; diff --git a/src/hpa.c b/src/hpa.c index c01dde13..adb106cc 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -8,6 +8,16 @@ #define HPA_EDEN_SIZE (128 * HUGEPAGE) +#define HPA_MIN_VAR_VEC_SIZE 8 +#ifdef JEMALLOC_HAVE_PROCESS_MADVISE +typedef struct iovec hpa_io_vector_t; +#else +typedef struct { + void *iov_base; + size_t iov_len; +} hpa_io_vector_t; +#endif + static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, bool guarded, bool frequent_reuse, bool *deferred_work_generated); @@ -422,6 +432,24 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) { return to_hugify != NULL || hpa_should_purge(tsdn, shard); } +/* If we fail vectorized purge, we will do single */ +static void +hpa_try_vectorized_purge(hpa_shard_t *shard, hpa_io_vector_t *vec, + size_t vlen, size_t nbytes) { + bool success = opt_process_madvise_max_batch > 0 + && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes); + if (!success) { + /* On failure, it is safe to purge again (potential perf + * penalty) If kernel can tell exactly which regions + * failed, we could avoid that penalty. + */ + for (size_t i = 0; i < vlen; ++i) { + shard->central->hooks.purge(vec[i].iov_base, + vec[i].iov_len); + } + } +} + /* Returns whether or not we purged anything. */ static bool hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) { @@ -470,14 +498,37 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) { } size_t total_purged = 0; uint64_t purges_this_pass = 0; + + assert(opt_process_madvise_max_batch <= + PROCESS_MADVISE_MAX_BATCH_LIMIT); + size_t len = opt_process_madvise_max_batch == 0 ? + HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch; + VARIABLE_ARRAY(hpa_io_vector_t, vec, len); + void *purge_addr; size_t purge_size; + size_t cur = 0; + size_t total_batch_bytes = 0; while (hpdata_purge_next(to_purge, &purge_state, &purge_addr, &purge_size)) { + vec[cur].iov_base = purge_addr; + vec[cur].iov_len = purge_size; total_purged += purge_size; assert(total_purged <= HUGEPAGE); purges_this_pass++; - shard->central->hooks.purge(purge_addr, purge_size); + total_batch_bytes += purge_size; + cur++; + if (cur == len) { + hpa_try_vectorized_purge(shard, vec, len, total_batch_bytes); + assert(total_batch_bytes > 0); + cur = 0; + total_batch_bytes = 0; + } + } + + /* Batch was not full */ + if (cur > 0) { + hpa_try_vectorized_purge(shard, vec, cur, total_batch_bytes); } malloc_mutex_lock(tsdn, &shard->mtx); diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c index 4628c14f..072d490e 100644 --- a/src/hpa_hooks.c +++ b/src/hpa_hooks.c @@ -10,6 +10,8 @@ static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync); static void hpa_hooks_dehugify(void *ptr, size_t size); static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading); static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime); +static bool hpa_hooks_vectorized_purge( + void *vec, size_t vlen, size_t nbytes); const hpa_hooks_t hpa_hooks_default = { &hpa_hooks_map, @@ -18,7 +20,8 @@ const hpa_hooks_t hpa_hooks_default = { &hpa_hooks_hugify, &hpa_hooks_dehugify, &hpa_hooks_curtime, - &hpa_hooks_ms_since + &hpa_hooks_ms_since, + &hpa_hooks_vectorized_purge }; static void * @@ -78,3 +81,14 @@ static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime) { return nstime_ms_since(past_nstime); } + + +/* Return true if we did not purge all nbytes, or on some error */ +static bool +hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) { +#ifdef JEMALLOC_HAVE_PROCESS_MADVISE + return pages_purge_process_madvise(vec, vlen, nbytes); +#else + return true; +#endif +} diff --git a/test/unit/hpa.c b/test/unit/hpa.c index ceed9bd8..e53ee2ec 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -357,6 +357,16 @@ defer_test_purge(void *ptr, size_t size) { ++ndefer_purge_calls; } +static bool defer_vectorized_purge_called = false; +static bool +defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) { + (void)vec; + (void)nbytes; + ++ndefer_purge_calls; + defer_vectorized_purge_called = true; + return false; +} + static size_t ndefer_hugify_calls = 0; static bool defer_test_hugify(void *ptr, size_t size, bool sync) { @@ -392,6 +402,7 @@ TEST_BEGIN(test_defer_time) { hooks.dehugify = &defer_test_dehugify; hooks.curtime = &defer_test_curtime; hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; hpa_shard_opts_t opts = test_hpa_shard_opts_default; opts.deferral_allowed = true; @@ -506,6 +517,7 @@ TEST_BEGIN(test_no_min_purge_interval) { hooks.dehugify = &defer_test_dehugify; hooks.curtime = &defer_test_curtime; hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; hpa_shard_opts_t opts = test_hpa_shard_opts_default; opts.deferral_allowed = true; @@ -548,6 +560,7 @@ TEST_BEGIN(test_min_purge_interval) { hooks.dehugify = &defer_test_dehugify; hooks.curtime = &defer_test_curtime; hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; hpa_shard_opts_t opts = test_hpa_shard_opts_default; opts.deferral_allowed = true; @@ -598,6 +611,7 @@ TEST_BEGIN(test_purge) { hooks.dehugify = &defer_test_dehugify; hooks.curtime = &defer_test_curtime; hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; hpa_shard_opts_t opts = test_hpa_shard_opts_default; opts.deferral_allowed = true; @@ -664,6 +678,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) { hooks.dehugify = &defer_test_dehugify; hooks.curtime = &defer_test_curtime; hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; hpa_shard_opts_t opts = test_hpa_shard_opts_default; opts.deferral_allowed = true; @@ -732,6 +747,7 @@ TEST_BEGIN(test_demand_purge_slack) { hooks.dehugify = &defer_test_dehugify; hooks.curtime = &defer_test_curtime; hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; hpa_shard_opts_t opts = test_hpa_shard_opts_default; opts.deferral_allowed = true; @@ -799,6 +815,7 @@ TEST_BEGIN(test_demand_purge_tight) { hooks.dehugify = &defer_test_dehugify; hooks.curtime = &defer_test_curtime; hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; hpa_shard_opts_t opts = test_hpa_shard_opts_default; opts.deferral_allowed = true; @@ -855,6 +872,44 @@ TEST_BEGIN(test_demand_purge_tight) { } TEST_END +TEST_BEGIN(test_vectorized_opt_eq_zero) { + test_skip_if(!hpa_supported() || + (opt_process_madvise_max_batch != 0)); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + opts.min_purge_interval_ms = 0; + + defer_vectorized_purge_called = false; + ndefer_purge_calls = 0; + + hpa_shard_t *shard = create_test_data(&hooks, &opts); + bool deferred_work_generated = false; + nstime_init(&defer_curtime, 0); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edata, "Unexpected null edata"); + pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated); + hpa_shard_do_deferred_work(tsdn, shard); + + expect_false(defer_vectorized_purge_called, "No vec purge"); + expect_zu_eq(1, ndefer_purge_calls, "Expect purge"); + + destroy_test_data(shard); +} +TEST_END + int main(void) { /* @@ -880,5 +935,6 @@ main(void) { test_purge, test_experimental_max_purge_nhp, test_demand_purge_slack, - test_demand_purge_tight); + test_demand_purge_tight, + test_vectorized_opt_eq_zero); } diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c new file mode 100644 index 00000000..130dc699 --- /dev/null +++ b/test/unit/hpa_vectorized_madvise.c @@ -0,0 +1,258 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/hpa.h" +#include "jemalloc/internal/nstime.h" + +#define SHARD_IND 111 + +#define ALLOC_MAX (HUGEPAGE) + +typedef struct test_data_s test_data_t; +struct test_data_s { + /* + * Must be the first member -- we convert back and forth between the + * test_data_t and the hpa_shard_t; + */ + hpa_shard_t shard; + hpa_central_t central; + base_t *base; + edata_cache_t shard_edata_cache; + + emap_t emap; +}; + +static hpa_shard_opts_t test_hpa_shard_opts_default = { + /* slab_max_alloc */ + ALLOC_MAX, + /* hugification_threshold */ + HUGEPAGE, + /* dirty_mult */ + FXP_INIT_PERCENT(25), + /* deferral_allowed */ + false, + /* hugify_delay_ms */ + 10 * 1000, + /* hugify_sync */ + false, + /* min_purge_interval_ms */ + 5 * 1000, + /* experimental_max_purge_nhp */ + -1, + /* peak_demand_window_ms */ + 0 +}; + +static hpa_shard_t * +create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { + bool err; + base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND, + &ehooks_default_extent_hooks, /* metadata_use_hooks */ true); + assert_ptr_not_null(base, ""); + + test_data_t *test_data = malloc(sizeof(test_data_t)); + assert_ptr_not_null(test_data, ""); + + test_data->base = base; + + err = edata_cache_init(&test_data->shard_edata_cache, base); + assert_false(err, ""); + + err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false); + assert_false(err, ""); + + err = hpa_central_init(&test_data->central, test_data->base, hooks); + assert_false(err, ""); + + err = hpa_shard_init(&test_data->shard, &test_data->central, + &test_data->emap, test_data->base, &test_data->shard_edata_cache, + SHARD_IND, opts); + assert_false(err, ""); + + return (hpa_shard_t *)test_data; +} + +static void +destroy_test_data(hpa_shard_t *shard) { + test_data_t *test_data = (test_data_t *)shard; + base_delete(TSDN_NULL, test_data->base); + free(test_data); +} + +static uintptr_t defer_bump_ptr = HUGEPAGE * 123; +static void * +defer_test_map(size_t size) { + void *result = (void *)defer_bump_ptr; + defer_bump_ptr += size; + return result; +} + +static void +defer_test_unmap(void *ptr, size_t size) { + (void)ptr; + (void)size; +} + +static size_t ndefer_purge_calls = 0; +static void +defer_test_purge(void *ptr, size_t size) { + (void)ptr; + (void)size; + ++ndefer_purge_calls; +} + +static size_t ndefer_vec_purge_calls = 0; +static bool +defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) { + (void)vec; + (void)nbytes; + ++ndefer_vec_purge_calls; + return false; +} + +static bool defer_vec_purge_didfail = false; +static bool defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) { + (void)vec; + (void)vlen; + (void)nbytes; + defer_vec_purge_didfail = true; + return true; +} + +static size_t ndefer_hugify_calls = 0; +static bool +defer_test_hugify(void *ptr, size_t size, bool sync) { + ++ndefer_hugify_calls; + return false; +} + +static size_t ndefer_dehugify_calls = 0; +static void +defer_test_dehugify(void *ptr, size_t size) { + ++ndefer_dehugify_calls; +} + +static nstime_t defer_curtime; +static void +defer_test_curtime(nstime_t *r_time, bool first_reading) { + *r_time = defer_curtime; +} + +static uint64_t +defer_test_ms_since(nstime_t *past_time) { + return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000; +} + +TEST_BEGIN(test_vectorized_failure_fallback) { + test_skip_if(!hpa_supported() || + (opt_process_madvise_max_batch == 0)); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge_fail; + defer_vec_purge_didfail = false; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + opts.min_purge_interval_ms = 0; + + hpa_shard_t *shard = create_test_data(&hooks, &opts); + + bool deferred_work_generated = false; + + nstime_init(&defer_curtime, 0); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + + edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edata, "Unexpected null edata"); + pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated); + hpa_shard_do_deferred_work(tsdn, shard); + + expect_true(defer_vec_purge_didfail, "Expect vec purge fail"); + expect_zu_eq(1, ndefer_purge_calls, "Expect non-vec purge"); + ndefer_purge_calls = 0; + + destroy_test_data(shard); +} +TEST_END + +TEST_BEGIN(test_more_regions_purged_from_one_page) { + test_skip_if(!hpa_supported() || + (opt_process_madvise_max_batch == 0) || + HUGEPAGE_PAGES <= 4); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + opts.min_purge_interval_ms = 0; + ndefer_vec_purge_calls = 0; + ndefer_purge_calls = 0; + + hpa_shard_t *shard = create_test_data(&hooks, &opts); + + bool deferred_work_generated = false; + + nstime_init(&defer_curtime, 0); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + + enum {NALLOCS = 8 * HUGEPAGE_PAGES}; + edata_t *edatas[NALLOCS]; + for (int i = 0; i < NALLOCS; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + /* Deallocate almost 3 pages out of 8, and to force batching + * leave the 2nd and 4th PAGE in the first 3 hugepages. + */ + for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) { + int j = i % HUGEPAGE_PAGES; + if (j != 1 && j != 3) { + pai_dalloc(tsdn, &shard->pai, edatas[i], + &deferred_work_generated); + } + } + + hpa_shard_do_deferred_work(tsdn, shard); + + /* + * Strict minimum purge interval is not set, we should purge as long as + * we have dirty pages. + */ + expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early"); + expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early"); + + /* We purge from 2 huge pages, each one 3 segments. That's 6 non + * vectorized calls, or 2 <= vc <=6 vectorized calls + * (depending on batch size). + */ + size_t nexpected = 2 * (1 + (3 - 1) / opt_process_madvise_max_batch); + expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge"); + expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge"); + ndefer_vec_purge_calls = 0; + + destroy_test_data(shard); +} +TEST_END + +int +main(void) { + return test_no_reentrancy( + test_vectorized_failure_fallback, + test_more_regions_purged_from_one_page); +} diff --git a/test/unit/hpa_vectorized_madvise.sh b/test/unit/hpa_vectorized_madvise.sh new file mode 100644 index 00000000..c5d66afa --- /dev/null +++ b/test/unit/hpa_vectorized_madvise.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +export MALLOC_CONF="process_madvise_max_batch:2"