[EASY] Encapsulate better, do not pass hpa_shard when hooks are enough, move shard independent actions to hpa_utils

This commit is contained in:
Slobodan Predolac 2025-09-30 08:36:19 -07:00 committed by guangli-dai
parent 7befc6f636
commit f6fc71f79c
14 changed files with 198 additions and 175 deletions

View file

@ -124,6 +124,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/hook.c \
$(srcroot)src/hpa.c \
$(srcroot)src/hpa_hooks.c \
$(srcroot)src/hpa_utils.c \
$(srcroot)src/hpdata.c \
$(srcroot)src/inspect.c \
$(srcroot)src/large.c \

View file

@ -2,8 +2,20 @@
#define JEMALLOC_INTERNAL_HPA_UTILS_H
#include "jemalloc/internal/hpa.h"
#include "jemalloc/internal/extent.h"
#define HPA_MIN_VAR_VEC_SIZE 8
/*
* This is used for jemalloc internal tuning and may change in the future based
* on production traffic.
*
* This value protects two things:
* 1. Stack size
* 2. Number of huge pages that are being purged in a batch as we do not
* allow allocations while making madvise syscall.
*/
#define HPA_PURGE_BATCH_MAX 16
#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
typedef struct iovec hpa_io_vector_t;
#else
@ -13,27 +25,35 @@ typedef struct {
} hpa_io_vector_t;
#endif
static inline size_t
hpa_process_madvise_max_iovec_len(void) {
assert(
opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
return opt_process_madvise_max_batch == 0
? HPA_MIN_VAR_VEC_SIZE
: opt_process_madvise_max_batch;
}
/* Actually invoke hooks. If we fail vectorized, use single purges */
static void
hpa_try_vectorized_purge(
hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
hpa_hooks_t *hooks, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
bool success = opt_process_madvise_max_batch > 0
&& !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
&& !hooks->vectorized_purge(vec, vlen, nbytes);
if (!success) {
/* On failure, it is safe to purge again (potential perf
* penalty) If kernel can tell exactly which regions
* failed, we could avoid that penalty.
*/
* penalty) If kernel can tell exactly which regions
* failed, we could avoid that penalty.
*/
for (size_t i = 0; i < vlen; ++i) {
shard->central->hooks.purge(
vec[i].iov_base, vec[i].iov_len);
hooks->purge(vec[i].iov_base, vec[i].iov_len);
}
}
}
/*
* This struct accumulates the regions for process_madvise.
* It invokes the hook when batch limit is reached
* This structure accumulates the regions for process_madvise. It invokes the
* hook when batch limit is reached.
*/
typedef struct {
hpa_io_vector_t *vp;
@ -51,16 +71,16 @@ hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) {
}
static inline void
hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_shard_t *shard) {
hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_hooks_t *hooks) {
assert(ra->total_bytes > 0 && ra->cur > 0);
hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
hpa_try_vectorized_purge(hooks, ra->vp, ra->cur, ra->total_bytes);
ra->cur = 0;
ra->total_bytes = 0;
}
static inline void
hpa_range_accum_add(
hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
hpa_range_accum_t *ra, void *addr, size_t sz, hpa_hooks_t *hooks) {
assert(ra->cur < ra->capacity);
ra->vp[ra->cur].iov_base = addr;
@ -69,14 +89,14 @@ hpa_range_accum_add(
ra->cur++;
if (ra->cur == ra->capacity) {
hpa_range_accum_flush(ra, shard);
hpa_range_accum_flush(ra, hooks);
}
}
static inline void
hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_hooks_t *hooks) {
if (ra->cur > 0) {
hpa_range_accum_flush(ra, shard);
hpa_range_accum_flush(ra, hooks);
}
}
@ -114,4 +134,28 @@ struct hpa_purge_batch_s {
size_t npurged_hp_total;
};
static inline bool
hpa_batch_full(hpa_purge_batch_t *b) {
/* It's okay for ranges to go above */
return b->npurged_hp_total == b->max_hp
|| b->item_cnt == b->items_capacity
|| b->nranges >= b->range_watermark;
}
static inline void
hpa_batch_pass_start(hpa_purge_batch_t *b) {
b->item_cnt = 0;
b->nranges = 0;
b->ndirty_in_batch = 0;
}
static inline bool
hpa_batch_empty(hpa_purge_batch_t *b) {
return b->item_cnt == 0;
}
/* Purge pages in a batch using given hooks */
void hpa_purge_batch(
hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz);
#endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */

View file

@ -62,6 +62,7 @@
<ClCompile Include="..\..\..\..\src\hook.c" />
<ClCompile Include="..\..\..\..\src\hpa.c" />
<ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
<ClCompile Include="..\..\..\..\src\hpa_utils.c" />
<ClCompile Include="..\..\..\..\src\hpdata.c" />
<ClCompile Include="..\..\..\..\src\inspect.c" />
<ClCompile Include="..\..\..\..\src\jemalloc.c" />
@ -380,4 +381,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -70,6 +70,9 @@
<ClCompile Include="..\..\..\..\src\hpa_hooks.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\hpa_utils.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\hpdata.c">
<Filter>Source Files</Filter>
</ClCompile>
@ -163,6 +166,9 @@
<ClCompile Include="..\..\..\..\src\thread_event.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\thread_event_registry.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\ticker.c">
<Filter>Source Files</Filter>
</ClCompile>

View file

@ -62,6 +62,7 @@
<ClCompile Include="..\..\..\..\src\hook.c" />
<ClCompile Include="..\..\..\..\src\hpa.c" />
<ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
<ClCompile Include="..\..\..\..\src\hpa_utils.c" />
<ClCompile Include="..\..\..\..\src\hpdata.c" />
<ClCompile Include="..\..\..\..\src\inspect.c" />
<ClCompile Include="..\..\..\..\src\jemalloc.c" />
@ -379,4 +380,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -70,6 +70,9 @@
<ClCompile Include="..\..\..\..\src\hpa_hooks.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\hpa_utils.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\hpdata.c">
<Filter>Source Files</Filter>
</ClCompile>
@ -163,6 +166,9 @@
<ClCompile Include="..\..\..\..\src\thread_event.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\thread_event_registry.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\ticker.c">
<Filter>Source Files</Filter>
</ClCompile>

View file

@ -62,6 +62,7 @@
<ClCompile Include="..\..\..\..\src\hook.c" />
<ClCompile Include="..\..\..\..\src\hpa.c" />
<ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
<ClCompile Include="..\..\..\..\src\hpa_utils.c" />
<ClCompile Include="..\..\..\..\src\hpdata.c" />
<ClCompile Include="..\..\..\..\src\inspect.c" />
<ClCompile Include="..\..\..\..\src\jemalloc.c" />
@ -379,4 +380,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -70,6 +70,9 @@
<ClCompile Include="..\..\..\..\src\hpa_hooks.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\hpa_utils.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\hpdata.c">
<Filter>Source Files</Filter>
</ClCompile>
@ -163,6 +166,9 @@
<ClCompile Include="..\..\..\..\src\thread_event.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\thread_event_registry.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\ticker.c">
<Filter>Source Files</Filter>
</ClCompile>

View file

@ -62,6 +62,7 @@
<ClCompile Include="..\..\..\..\src\hook.c" />
<ClCompile Include="..\..\..\..\src\hpa.c" />
<ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
<ClCompile Include="..\..\..\..\src\hpa_utils.c" />
<ClCompile Include="..\..\..\..\src\hpdata.c" />
<ClCompile Include="..\..\..\..\src\inspect.c" />
<ClCompile Include="..\..\..\..\src\jemalloc.c" />
@ -379,4 +380,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -70,6 +70,9 @@
<ClCompile Include="..\..\..\..\src\hpa_hooks.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\hpa_utils.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\hpdata.c">
<Filter>Source Files</Filter>
</ClCompile>
@ -163,6 +166,9 @@
<ClCompile Include="..\..\..\..\src\thread_event.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\thread_event_registry.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\ticker.c">
<Filter>Source Files</Filter>
</ClCompile>

View file

@ -473,70 +473,6 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
return to_hugify != NULL || hpa_should_purge(tsdn, shard);
}
/*
* This is used for jemalloc internal tuning and may change in the
* future based on production traffic.
*
* This value protects two things:
* 1. Stack size
* 2. Number of huge pages that are being purged in a batch as
* we do not allow allocations while making madvise syscall.
*/
#define HPA_PURGE_BATCH_MAX_DEFAULT 16
#ifndef JEMALLOC_JET
# define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
#else
size_t hpa_purge_max_batch_size_for_test = HPA_PURGE_BATCH_MAX_DEFAULT;
size_t
hpa_purge_max_batch_size_for_test_set(size_t new_size) {
size_t old_size = hpa_purge_max_batch_size_for_test;
hpa_purge_max_batch_size_for_test = new_size;
return old_size;
}
# define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
#endif
static inline size_t
hpa_process_madvise_max_iovec_len(void) {
assert(
opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
return opt_process_madvise_max_batch == 0
? HPA_MIN_VAR_VEC_SIZE
: opt_process_madvise_max_batch;
}
static inline void
hpa_purge_actual_unlocked(
hpa_shard_t *shard, hpa_purge_item_t *batch, size_t batch_sz) {
assert(batch_sz > 0);
size_t len = hpa_process_madvise_max_iovec_len();
VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
hpa_range_accum_t accum;
hpa_range_accum_init(&accum, vec, len);
for (size_t i = 0; i < batch_sz; ++i) {
/* Actually do the purging, now that the lock is dropped. */
if (batch[i].dehugify) {
shard->central->hooks.dehugify(
hpdata_addr_get(batch[i].hp), HUGEPAGE);
}
void *purge_addr;
size_t purge_size;
size_t total_purged_on_one_hp = 0;
while (hpdata_purge_next(
batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
total_purged_on_one_hp += purge_size;
assert(total_purged_on_one_hp <= HUGEPAGE);
hpa_range_accum_add(
&accum, purge_addr, purge_size, shard);
}
}
hpa_range_accum_finish(&accum, shard);
}
static inline bool
hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) {
return (hpa_is_hugify_lazy(shard)
@ -624,26 +560,6 @@ hpa_purge_finish_hp(
psset_update_end(&shard->psset, hp_item->hp);
}
static inline bool
hpa_batch_full(hpa_purge_batch_t *b) {
/* It's okay for ranges to go above */
return b->npurged_hp_total == b->max_hp
|| b->item_cnt == b->items_capacity
|| b->nranges >= b->range_watermark;
}
static inline void
hpa_batch_pass_start(hpa_purge_batch_t *b) {
b->item_cnt = 0;
b->nranges = 0;
b->ndirty_in_batch = 0;
}
static inline bool
hpa_batch_empty(hpa_purge_batch_t *b) {
return b->item_cnt == 0;
}
/* Returns number of huge pages purged. */
static inline size_t
hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
@ -679,8 +595,9 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
if (hpa_batch_empty(&batch)) {
break;
}
hpa_hooks_t *hooks = &shard->central->hooks;
malloc_mutex_unlock(tsdn, &shard->mtx);
hpa_purge_actual_unlocked(shard, batch.items, batch.item_cnt);
hpa_purge_batch(hooks, batch.items, batch.item_cnt);
malloc_mutex_lock(tsdn, &shard->mtx);
/* The shard updates */

33
src/hpa_utils.c Normal file
View file

@ -0,0 +1,33 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
#include "jemalloc/internal/hpa_utils.h"
void
hpa_purge_batch(hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz) {
assert(batch_sz > 0);
size_t len = hpa_process_madvise_max_iovec_len();
VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
hpa_range_accum_t accum;
hpa_range_accum_init(&accum, vec, len);
for (size_t i = 0; i < batch_sz; ++i) {
/* Actually do the purging, now that the lock is dropped. */
if (batch[i].dehugify) {
hooks->dehugify(hpdata_addr_get(batch[i].hp), HUGEPAGE);
}
void *purge_addr;
size_t purge_size;
size_t total_purged_on_one_hp = 0;
while (hpdata_purge_next(
batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
total_purged_on_one_hp += purge_size;
assert(total_purged_on_one_hp <= HUGEPAGE);
hpa_range_accum_add(
&accum, purge_addr, purge_size, hooks);
}
}
hpa_range_accum_finish(&accum, hooks);
}

View file

@ -253,77 +253,8 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
}
TEST_END
size_t hpa_purge_max_batch_size_for_test_set(size_t new_size);
TEST_BEGIN(test_more_pages_than_batch_page_size) {
test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0)
|| HUGEPAGE_PAGES <= 4);
size_t old_page_batch = hpa_purge_max_batch_size_for_test_set(1);
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
opts.min_purge_interval_ms = 0;
ndefer_vec_purge_calls = 0;
ndefer_purge_calls = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum { NALLOCS = 8 * HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
hpa_shard_do_deferred_work(tsdn, shard);
/*
* Strict minimum purge interval is not set, we should purge as long as
* we have dirty pages.
*/
expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
/* We have page batch size = 1.
* we have 5 * HP active pages, 3 * HP dirty pages
* To achieve the balance of 25% max dirty we need to
* purge 2 pages. Since batch is 1 that must be 2 calls
* no matter what opt_process_madvise_max_batch is
*/
size_t nexpected = 2;
expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
ndefer_vec_purge_calls = 0;
hpa_purge_max_batch_size_for_test_set(old_page_batch);
destroy_test_data(shard);
}
TEST_END
int
main(void) {
return test_no_reentrancy(test_vectorized_failure_fallback,
test_more_regions_purged_from_one_page,
test_more_pages_than_batch_page_size);
test_more_regions_purged_from_one_page);
}

View file

@ -1,6 +1,7 @@
#include "test/jemalloc_test.h"
#include "jemalloc/internal/hpa.h"
#include "jemalloc/internal/hpa_utils.h"
#include "jemalloc/internal/nstime.h"
#define SHARD_IND 111
@ -195,7 +196,75 @@ TEST_BEGIN(test_vectorized_purge) {
}
TEST_END
TEST_BEGIN(test_purge_more_than_one_batch_pages) {
test_skip_if(!hpa_supported()
|| (opt_process_madvise_max_batch < HPA_PURGE_BATCH_MAX)
|| HUGEPAGE_PAGES <= 4);
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hooks.ms_since = &defer_test_ms_since;
hooks.vectorized_purge = &defer_vectorized_purge;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
opts.min_purge_interval_ms = 0;
opts.dirty_mult = FXP_INIT_PERCENT(1);
ndefer_vec_purge_calls = 0;
ndefer_purge_calls = 0;
ndefer_hugify_calls = 0;
ndefer_dehugify_calls = 0;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
bool deferred_work_generated = false;
nstime_init(&defer_curtime, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum { NALLOCS = HPA_PURGE_BATCH_MAX * 3 * HUGEPAGE_PAGES };
edata_t *edatas[NALLOCS];
for (int i = 0; i < NALLOCS; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
false, false, &deferred_work_generated);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
for (int i = 0; i < HPA_PURGE_BATCH_MAX * 2 * (int)HUGEPAGE_PAGES;
i++) {
pai_dalloc(
tsdn, &shard->pai, edatas[i], &deferred_work_generated);
}
hpa_shard_do_deferred_work(tsdn, shard);
/*
* Strict minimum purge interval is not set, we should purge as long as
* we have dirty pages.
*/
expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
/* We have page batch size = HPA_PURGE_BATCH_MAX. We have
* HPA_PURGE_BATCH_MAX active pages, 2 * HPA_PURGE_BATCH_MAX dirty.
* To achieve the balance of 1% max dirty we need to purge more than one
* batch.
*/
size_t nexpected = 2;
expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
ndefer_vec_purge_calls = 0;
destroy_test_data(shard);
}
TEST_END
int
main(void) {
return test_no_reentrancy(test_vectorized_purge);
return test_no_reentrancy(
test_vectorized_purge, test_purge_more_than_one_batch_pages);
}