mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-14 14:41:42 +03:00
Experimental configuration option for fast path prefetch from cache_bin
This commit is contained in:
parent
edaab8b3ad
commit
c0e9a8d326
5 changed files with 195 additions and 1 deletions
16
configure.ac
16
configure.ac
|
|
@ -1434,6 +1434,22 @@ if test "x$enable_experimental_smallocx" = "x1" ; then
|
|||
fi
|
||||
AC_SUBST([enable_experimental_smallocx])
|
||||
|
||||
dnl Do not enable fastpath prefetch by default.
|
||||
AC_ARG_ENABLE([experimental_fp_prefetch],
|
||||
[AS_HELP_STRING([--enable-experimental-fp-prefetch], [Enable experimental fastpath prefetch])],
|
||||
[if test "x$enable_experimental_fp_prefetch" = "xno" ; then
|
||||
enable_experimental_fp_prefetch="0"
|
||||
else
|
||||
enable_experimental_fp_prefetch="1"
|
||||
fi
|
||||
],
|
||||
[enable_experimental_fp_prefetch="0"]
|
||||
)
|
||||
if test "x$enable_experimental_fp_prefetch" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH], [ ], [ ])
|
||||
fi
|
||||
AC_SUBST([enable_experimental_fp_prefetch])
|
||||
|
||||
dnl Do not enable profiling by default.
|
||||
AC_ARG_ENABLE([prof],
|
||||
[AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
|
||||
|
|
|
|||
|
|
@ -376,6 +376,38 @@ cache_bin_low_water_adjust(cache_bin_t *bin) {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_JET
|
||||
typedef void (*test_prefetch_hook_t)(void *ptr, bool is_write);
|
||||
test_prefetch_hook_t
|
||||
cache_bin_prefetch_hook_set(test_prefetch_hook_t);
|
||||
extern test_prefetch_hook_t cache_bin_prefetch_test_hook;
|
||||
#endif
|
||||
|
||||
#ifdef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
|
||||
/*
|
||||
* We pad each non-disabled bin with a slot so that we can safely prefetch the
|
||||
* next pointer after the one returned on the fast path.
|
||||
*/
|
||||
static inline void
|
||||
prefetch_one_w(void *ptr) {
|
||||
#ifdef JEMALLOC_JET
|
||||
if (cache_bin_prefetch_test_hook) {
|
||||
cache_bin_prefetch_test_hook(ptr, /* write */ true);
|
||||
} else {
|
||||
/* Still want to exercise the code in tests without the hook */
|
||||
util_prefetch_write(ptr);
|
||||
}
|
||||
#else
|
||||
util_prefetch_write(ptr);
|
||||
#endif /* JEMALLOC_JET */
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void prefetch_one_w(void *ptr) {}
|
||||
|
||||
#endif /* JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH */
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
|
||||
/*
|
||||
|
|
@ -400,6 +432,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
|
|||
*/
|
||||
if (likely(low_bits != bin->low_bits_low_water)) {
|
||||
bin->stack_head = new_head;
|
||||
prefetch_one_w(*new_head);
|
||||
*success = true;
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -414,6 +447,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
|
|||
*/
|
||||
if (likely(low_bits != bin->low_bits_empty)) {
|
||||
bin->stack_head = new_head;
|
||||
prefetch_one_w(*new_head);
|
||||
bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head;
|
||||
*success = true;
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -160,6 +160,11 @@
|
|||
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
|
||||
#undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
|
||||
|
||||
/* JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH enables prefetch
|
||||
* on malloc fast path.
|
||||
*/
|
||||
#undef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
|
||||
|
||||
/* JEMALLOC_PROF enables allocation profiling. */
|
||||
#undef JEMALLOC_PROF
|
||||
|
||||
|
|
|
|||
|
|
@ -36,10 +36,18 @@ cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
|
|||
* checking "is_empty"; and
|
||||
* 2) the cur_ptr can go beyond the empty position by 1 step safely on
|
||||
* the fast path (i.e. no overflow).
|
||||
*
|
||||
* For each non-disabled cache_bin reserve extra slot to allow prefetch
|
||||
* without checking the boundary on the fast path
|
||||
*/
|
||||
*size = sizeof(void *) * 2;
|
||||
for (szind_t i = 0; i < ninfos; i++) {
|
||||
*size += infos[i].ncached_max * sizeof(void *);
|
||||
#ifdef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
|
||||
if (infos[i].ncached_max > 0) {
|
||||
*size += sizeof(void *);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -100,6 +108,12 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
|
|||
bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head);
|
||||
assert(free_spots == bin_stack_size);
|
||||
if (!cache_bin_disabled(bin)) {
|
||||
#ifdef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
|
||||
/* Address will be mapped to physical page already */
|
||||
void **addr = (void **)((byte_t *)alloc + *cur_offset);
|
||||
*addr = addr;
|
||||
*cur_offset += sizeof(void *);
|
||||
#endif
|
||||
assert(cache_bin_ncached_get_local(bin) == 0);
|
||||
}
|
||||
assert(cache_bin_empty_position_get(bin) == empty_position);
|
||||
|
|
@ -117,3 +131,13 @@ cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max) {
|
|||
cache_bin_info_init(&bin->bin_info, ncached_max);
|
||||
assert(fake_offset == 0);
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_JET
|
||||
test_prefetch_hook_t cache_bin_prefetch_test_hook = NULL;
|
||||
test_prefetch_hook_t
|
||||
cache_bin_prefetch_hook_set(test_prefetch_hook_t f) {
|
||||
test_prefetch_hook_t old = cache_bin_prefetch_test_hook;
|
||||
cache_bin_prefetch_test_hook = f;
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,5 +1,11 @@
|
|||
#include "test/jemalloc_test.h"
|
||||
|
||||
#ifdef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
|
||||
static bool experimental_fast_prefetch_enabled = true;
|
||||
#else
|
||||
static bool experimental_fast_prefetch_enabled = false;
|
||||
#endif
|
||||
|
||||
static void
|
||||
do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
|
||||
cache_bin_sz_t nfill_attempt, cache_bin_sz_t nfill_succeed) {
|
||||
|
|
@ -379,8 +385,117 @@ TEST_BEGIN(test_cache_bin_stash) {
|
|||
}
|
||||
TEST_END
|
||||
|
||||
typedef struct {
|
||||
void *ptr;
|
||||
bool is_write;
|
||||
} prefetch_arg_t;
|
||||
|
||||
#define PREFETCH_SZ 256
|
||||
static prefetch_arg_t prefetch_calls[PREFETCH_SZ];
|
||||
static unsigned nprefetch_calls;
|
||||
|
||||
static void
|
||||
prefetch_hook(void *p, bool is_write) {
|
||||
prefetch_calls[nprefetch_calls].ptr = p;
|
||||
prefetch_calls[nprefetch_calls].is_write = is_write;
|
||||
++nprefetch_calls;
|
||||
}
|
||||
|
||||
static void
|
||||
reset_prefetch_calls(void) {
|
||||
nprefetch_calls = 0;
|
||||
cache_bin_prefetch_hook_set(prefetch_hook);
|
||||
}
|
||||
|
||||
static void**
|
||||
do_dallocs_allocs(cache_bin_t *bin, int ncached_max) {
|
||||
bool success = false;
|
||||
assert(ncached_max < PREFETCH_SZ);
|
||||
/*
|
||||
* We allocate fully, so we can test
|
||||
* prefetch at the end of the cache bin.
|
||||
*/
|
||||
void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
|
||||
assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
|
||||
for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
|
||||
expect_true(cache_bin_ncached_get_local(bin) == i, "");
|
||||
success = cache_bin_dalloc_easy(bin, &ptrs[i]);
|
||||
}
|
||||
expect_true(cache_bin_ncached_get_local(bin) == ncached_max,
|
||||
"");
|
||||
|
||||
reset_prefetch_calls();
|
||||
for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
|
||||
void *ptr = cache_bin_alloc_easy(bin, &success);
|
||||
expect_true(success, "");
|
||||
expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1], "");
|
||||
}
|
||||
return ptrs;
|
||||
}
|
||||
|
||||
TEST_BEGIN(test_cache_bin_alloc_easy_prefetch_disabled) {
|
||||
test_skip_if(experimental_fast_prefetch_enabled);
|
||||
|
||||
const int ncached_max = 10;
|
||||
cache_bin_info_t info;
|
||||
cache_bin_info_init(&info, ncached_max);
|
||||
cache_bin_t bin;
|
||||
test_bin_init(&bin, &info);
|
||||
|
||||
/* Initialize to empty; should then have 0 elements. */
|
||||
expect_d_eq(ncached_max, cache_bin_ncached_max_get(&bin), "");
|
||||
expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
|
||||
|
||||
void **ptrs = do_dallocs_allocs(&bin, ncached_max);
|
||||
/* Check prefetch calls */
|
||||
expect_zu_eq(nprefetch_calls, 0, "No calls when prefetch disabled");
|
||||
|
||||
free(ptrs);
|
||||
cache_bin_prefetch_hook_set(NULL);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_cache_bin_alloc_easy_prefetch_enabled) {
|
||||
test_skip_if(!experimental_fast_prefetch_enabled);
|
||||
const int ncached_max = 10;
|
||||
|
||||
cache_bin_info_t info;
|
||||
cache_bin_info_init(&info, ncached_max);
|
||||
cache_bin_t bin;
|
||||
test_bin_init(&bin, &info);
|
||||
|
||||
/* Initialize to empty; should then have 0 elements. */
|
||||
expect_d_eq(ncached_max, cache_bin_ncached_max_get(&bin), "");
|
||||
expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
|
||||
|
||||
void **ptrs = do_dallocs_allocs(&bin, ncached_max);
|
||||
/* Check prefetch calls */
|
||||
expect_zu_eq(nprefetch_calls, ncached_max, "Not enough prefetch calls");
|
||||
/*
|
||||
* Each prefetched pointer should match one ahead in original array
|
||||
* in the opposite order as bin's head moves backwards on allocations.
|
||||
*/
|
||||
for (cache_bin_sz_t i = 1; i < ncached_max; i++) {
|
||||
expect_ptr_eq(prefetch_calls[i-1].ptr,
|
||||
&ptrs[ncached_max - 1 - i], "prefetch address wrong");
|
||||
}
|
||||
|
||||
/* Bin is empty now. stack_head points one past the "real" slots */
|
||||
expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
|
||||
void **expected_ptr = bin.stack_head;
|
||||
expect_ptr_eq(prefetch_calls[ncached_max - 1].ptr, expected_ptr,
|
||||
"prefetch address wrong for out of boundary");
|
||||
expect_ptr_eq(expected_ptr, *expected_ptr, "Content is the address");
|
||||
|
||||
free(ptrs);
|
||||
cache_bin_prefetch_hook_set(NULL);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
int
|
||||
main(void) {
|
||||
return test(test_cache_bin,
|
||||
test_cache_bin_stash);
|
||||
test_cache_bin_stash,
|
||||
test_cache_bin_alloc_easy_prefetch_disabled,
|
||||
test_cache_bin_alloc_easy_prefetch_enabled);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue