This commit is contained in:
Slobodan Predolac 2025-06-05 14:06:22 -07:00 committed by GitHub
commit 3866241cfa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 195 additions and 1 deletions

View file

@ -1434,6 +1434,22 @@ if test "x$enable_experimental_smallocx" = "x1" ; then
fi
AC_SUBST([enable_experimental_smallocx])
dnl Do not enable fastpath prefetch by default.
AC_ARG_ENABLE([experimental_fp_prefetch],
[AS_HELP_STRING([--enable-experimental-fp-prefetch], [Enable experimental fastpath prefetch])],
[if test "x$enable_experimental_fp_prefetch" = "xno" ; then
enable_experimental_fp_prefetch="0"
else
enable_experimental_fp_prefetch="1"
fi
],
[enable_experimental_fp_prefetch="0"]
)
if test "x$enable_experimental_fp_prefetch" = "x1" ; then
AC_DEFINE([JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH], [ ], [ ])
fi
AC_SUBST([enable_experimental_fp_prefetch])
dnl Do not enable profiling by default.
AC_ARG_ENABLE([prof],
[AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],

View file

@ -376,6 +376,38 @@ cache_bin_low_water_adjust(cache_bin_t *bin) {
}
}
#ifdef JEMALLOC_JET
typedef void (*test_prefetch_hook_t)(void *ptr, bool is_write);
test_prefetch_hook_t
cache_bin_prefetch_hook_set(test_prefetch_hook_t);
extern test_prefetch_hook_t cache_bin_prefetch_test_hook;
#endif
#ifdef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
/*
* We pad each non-disabled bin with a slot so that we can safely prefetch the
* next pointer after the one returned on the fast path.
*/
static inline void
prefetch_one_w(void *ptr) {
#ifdef JEMALLOC_JET
if (cache_bin_prefetch_test_hook) {
cache_bin_prefetch_test_hook(ptr, /* write */ true);
} else {
/* Still want to exercise the code in tests without the hook */
util_prefetch_write(ptr);
}
#else
util_prefetch_write(ptr);
#endif /* JEMALLOC_JET */
}
#else
static inline void prefetch_one_w(void *ptr) {}
#endif /* JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH */
JEMALLOC_ALWAYS_INLINE void *
cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
/*
@ -400,6 +432,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
*/
if (likely(low_bits != bin->low_bits_low_water)) {
bin->stack_head = new_head;
prefetch_one_w(*new_head);
*success = true;
return ret;
}
@ -414,6 +447,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
*/
if (likely(low_bits != bin->low_bits_empty)) {
bin->stack_head = new_head;
prefetch_one_w(*new_head);
bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head;
*success = true;
return ret;

View file

@ -160,6 +160,11 @@
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
#undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
/* JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH enables prefetch
* on malloc fast path.
*/
#undef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
/* JEMALLOC_PROF enables allocation profiling. */
#undef JEMALLOC_PROF

View file

@ -36,10 +36,18 @@ cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
* checking "is_empty"; and
* 2) the cur_ptr can go beyond the empty position by 1 step safely on
* the fast path (i.e. no overflow).
*
* For each non-disabled cache_bin reserve extra slot to allow prefetch
* without checking the boundary on the fast path
*/
*size = sizeof(void *) * 2;
for (szind_t i = 0; i < ninfos; i++) {
*size += infos[i].ncached_max * sizeof(void *);
#ifdef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
if (infos[i].ncached_max > 0) {
*size += sizeof(void *);
}
#endif
}
/*
@ -100,6 +108,12 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head);
assert(free_spots == bin_stack_size);
if (!cache_bin_disabled(bin)) {
#ifdef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
/* Address will be mapped to physical page already */
void **addr = (void **)((byte_t *)alloc + *cur_offset);
*addr = addr;
*cur_offset += sizeof(void *);
#endif
assert(cache_bin_ncached_get_local(bin) == 0);
}
assert(cache_bin_empty_position_get(bin) == empty_position);
@ -117,3 +131,13 @@ cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max) {
cache_bin_info_init(&bin->bin_info, ncached_max);
assert(fake_offset == 0);
}
#ifdef JEMALLOC_JET
test_prefetch_hook_t cache_bin_prefetch_test_hook = NULL;
test_prefetch_hook_t
cache_bin_prefetch_hook_set(test_prefetch_hook_t f) {
test_prefetch_hook_t old = cache_bin_prefetch_test_hook;
cache_bin_prefetch_test_hook = f;
return old;
}
#endif

View file

@ -1,5 +1,11 @@
#include "test/jemalloc_test.h"
#ifdef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
static bool experimental_fast_prefetch_enabled = true;
#else
static bool experimental_fast_prefetch_enabled = false;
#endif
static void
do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
cache_bin_sz_t nfill_attempt, cache_bin_sz_t nfill_succeed) {
@ -379,8 +385,117 @@ TEST_BEGIN(test_cache_bin_stash) {
}
TEST_END
typedef struct {
void *ptr;
bool is_write;
} prefetch_arg_t;
#define PREFETCH_SZ 256
static prefetch_arg_t prefetch_calls[PREFETCH_SZ];
static unsigned nprefetch_calls;
static void
prefetch_hook(void *p, bool is_write) {
prefetch_calls[nprefetch_calls].ptr = p;
prefetch_calls[nprefetch_calls].is_write = is_write;
++nprefetch_calls;
}
static void
reset_prefetch_calls(void) {
nprefetch_calls = 0;
cache_bin_prefetch_hook_set(prefetch_hook);
}
static void**
do_dallocs_allocs(cache_bin_t *bin, int ncached_max) {
bool success = false;
assert(ncached_max < PREFETCH_SZ);
/*
* We allocate fully, so we can test
* prefetch at the end of the cache bin.
*/
void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
expect_true(cache_bin_ncached_get_local(bin) == i, "");
success = cache_bin_dalloc_easy(bin, &ptrs[i]);
}
expect_true(cache_bin_ncached_get_local(bin) == ncached_max,
"");
reset_prefetch_calls();
for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
void *ptr = cache_bin_alloc_easy(bin, &success);
expect_true(success, "");
expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1], "");
}
return ptrs;
}
TEST_BEGIN(test_cache_bin_alloc_easy_prefetch_disabled) {
test_skip_if(experimental_fast_prefetch_enabled);
const int ncached_max = 10;
cache_bin_info_t info;
cache_bin_info_init(&info, ncached_max);
cache_bin_t bin;
test_bin_init(&bin, &info);
/* Initialize to empty; should then have 0 elements. */
expect_d_eq(ncached_max, cache_bin_ncached_max_get(&bin), "");
expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
void **ptrs = do_dallocs_allocs(&bin, ncached_max);
/* Check prefetch calls */
expect_zu_eq(nprefetch_calls, 0, "No calls when prefetch disabled");
free(ptrs);
cache_bin_prefetch_hook_set(NULL);
}
TEST_END
TEST_BEGIN(test_cache_bin_alloc_easy_prefetch_enabled) {
test_skip_if(!experimental_fast_prefetch_enabled);
const int ncached_max = 10;
cache_bin_info_t info;
cache_bin_info_init(&info, ncached_max);
cache_bin_t bin;
test_bin_init(&bin, &info);
/* Initialize to empty; should then have 0 elements. */
expect_d_eq(ncached_max, cache_bin_ncached_max_get(&bin), "");
expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
void **ptrs = do_dallocs_allocs(&bin, ncached_max);
/* Check prefetch calls */
expect_zu_eq(nprefetch_calls, ncached_max, "Not enough prefetch calls");
/*
* Each prefetched pointer should match one ahead in original array
* in the opposite order as bin's head moves backwards on allocations.
*/
for (cache_bin_sz_t i = 1; i < ncached_max; i++) {
expect_ptr_eq(prefetch_calls[i-1].ptr,
&ptrs[ncached_max - 1 - i], "prefetch address wrong");
}
/* Bin is empty now. stack_head points one past the "real" slots */
expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
void **expected_ptr = bin.stack_head;
expect_ptr_eq(prefetch_calls[ncached_max - 1].ptr, expected_ptr,
"prefetch address wrong for out of boundary");
expect_ptr_eq(expected_ptr, *expected_ptr, "Content is the address");
free(ptrs);
cache_bin_prefetch_hook_set(NULL);
}
TEST_END
int
main(void) {
return test(test_cache_bin,
test_cache_bin_stash);
test_cache_bin_stash,
test_cache_bin_alloc_easy_prefetch_disabled,
test_cache_bin_alloc_easy_prefetch_enabled);
}