Remove batch_alloc API

This commit is contained in:
Slobodan Predolac 2026-05-22 16:21:04 -07:00 committed by Guangli Dai
parent 78cbeaf8a4
commit c5a1822d69
10 changed files with 2 additions and 608 deletions

View file

@ -211,7 +211,6 @@ TESTS_UNIT := \
$(srcroot)test/unit/background_thread_enable.c \
$(srcroot)test/unit/background_thread_init.c \
$(srcroot)test/unit/base.c \
$(srcroot)test/unit/batch_alloc.c \
$(srcroot)test/unit/bin.c \
$(srcroot)test/unit/binshard.c \
$(srcroot)test/unit/bitmap.c \
@ -319,8 +318,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/zero_reallocs.c
ifeq (@enable_prof@, 1)
TESTS_UNIT += \
$(srcroot)test/unit/arena_reset_prof.c \
$(srcroot)test/unit/batch_alloc_prof.c
$(srcroot)test/unit/arena_reset_prof.c
endif
TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
$(srcroot)test/integration/allocated.c \
@ -354,8 +352,7 @@ endif
TESTS_ANALYZE := $(srcroot)test/analyze/prof_bias.c \
$(srcroot)test/analyze/rand.c \
$(srcroot)test/analyze/sizes.c
TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
$(srcroot)test/stress/fill_flush.c \
TESTS_STRESS := $(srcroot)test/stress/fill_flush.c \
$(srcroot)test/stress/large_microbench.c \
$(srcroot)test/stress/mallctl.c \
$(srcroot)test/stress/microbench.c

View file

@ -442,19 +442,6 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
return cache_bin_alloc_impl(bin, success, true);
}
JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
if (n > num) {
n = (cache_bin_sz_t)num;
}
memcpy(out, bin->stack_head, n * sizeof(void *));
bin->stack_head += n;
cache_bin_low_water_adjust(bin);
return n;
}
JEMALLOC_ALWAYS_INLINE bool
cache_bin_full(cache_bin_t *bin) {
return (

View file

@ -56,7 +56,6 @@ extern unsigned ncpus;
void *bootstrap_malloc(size_t size);
void *bootstrap_calloc(size_t num, size_t size);
void bootstrap_free(void *ptr);
size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
void sdallocx_default(void *ptr, size_t size, int flags);
void free_default(void *ptr);
void *malloc_default(size_t size);

View file

@ -2028,185 +2028,6 @@ je_malloc_size(const void *ptr) {
}
#endif
static void
batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
assert(config_prof && opt_prof);
bool prof_sample_event = te_prof_sample_event_lookahead(
tsd, batch * usize);
assert(!prof_sample_event);
size_t surplus;
prof_sample_event = te_prof_sample_event_lookahead_surplus(
tsd, (batch + 1) * usize, &surplus);
assert(prof_sample_event);
assert(surplus < usize);
}
size_t
batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
LOG("core.batch_alloc.entry",
"ptrs: %p, num: %zu, size: %zu, flags: %d", ptrs, num, size, flags);
tsd_t *tsd = tsd_fetch();
check_entry_exit_locking(tsd_tsdn(tsd));
size_t filled = 0;
if (unlikely(tsd == NULL || tsd_reentrancy_level_get(tsd) > 0)) {
goto label_done;
}
size_t alignment = MALLOCX_ALIGN_GET(flags);
size_t usize;
if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
goto label_done;
}
szind_t ind = sz_size2index(usize);
bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
/*
* The cache bin and arena will be lazily initialized; it's hard to
* know in advance whether each of them needs to be initialized.
*/
cache_bin_t *bin = NULL;
arena_t *arena = NULL;
size_t nregs = 0;
if (likely(ind < SC_NBINS)) {
nregs = bin_infos[ind].nregs;
assert(nregs > 0);
}
while (filled < num) {
size_t batch = num - filled;
size_t surplus = SIZE_MAX; /* Dead store. */
bool prof_sample_event = config_prof && opt_prof
&& prof_active_get_unlocked()
&& te_prof_sample_event_lookahead_surplus(
tsd, batch * usize, &surplus);
if (prof_sample_event) {
/*
* Adjust so that the batch does not trigger prof
* sampling.
*/
batch -= surplus / usize + 1;
batch_alloc_prof_sample_assert(tsd, batch, usize);
}
size_t progress = 0;
if (likely(ind < SC_NBINS) && batch >= nregs) {
if (arena == NULL) {
unsigned arena_ind = mallocx_arena_get(flags);
if (arena_get_from_ind(
tsd, arena_ind, &arena)) {
goto label_done;
}
if (arena == NULL) {
arena = arena_choose(tsd, NULL);
}
if (unlikely(arena == NULL)) {
goto label_done;
}
}
size_t arena_batch = batch - batch % nregs;
size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
ind, ptrs + filled, arena_batch, zero);
progress += n;
filled += n;
}
unsigned tcache_ind = mallocx_tcache_get(flags);
tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
/* slow */ true, /* is_alloc */ true);
if (likely(tcache != NULL
&& ind < tcache_nbins_get(tcache->tcache_slow)
&& !tcache_bin_disabled(
ind, &tcache->bins[ind], tcache->tcache_slow))
&& progress < batch) {
if (bin == NULL) {
bin = &tcache->bins[ind];
}
/*
* If we don't have a tcache bin, we don't want to
* immediately give up, because there's the possibility
* that the user explicitly requested to bypass the
* tcache, or that the user explicitly turned off the
* tcache; in such cases, we go through the slow path,
* i.e. the mallocx() call at the end of the while loop.
*/
if (bin != NULL) {
size_t bin_batch = batch - progress;
/*
* n can be less than bin_batch, meaning that
* the cache bin does not have enough memory.
* In such cases, we rely on the slow path,
* i.e. the mallocx() call at the end of the
* while loop, to fill in the cache, and in the
* next iteration of the while loop, the tcache
* will contain a lot of memory, and we can
* harvest them here. Compared to the
* alternative approach where we directly go to
* the arena bins here, the overhead of our
* current approach should usually be minimal,
* since we never try to fetch more memory than
* what a slab contains via the tcache. An
* additional benefit is that the tcache will
* not be empty for the next allocation request.
*/
size_t n = cache_bin_alloc_batch(
bin, bin_batch, ptrs + filled);
if (config_stats) {
bin->tstats.nrequests += n;
}
if (zero) {
for (size_t i = 0; i < n; ++i) {
memset(
ptrs[filled + i], 0, usize);
}
}
if (config_prof && opt_prof
&& unlikely(ind >= SC_NBINS)) {
for (size_t i = 0; i < n; ++i) {
prof_tctx_reset_sampled(
tsd, ptrs[filled + i]);
}
}
progress += n;
filled += n;
}
}
/*
* For thread events other than prof sampling, trigger them as
* if there's a single allocation of size (n * usize). This is
* fine because:
* (a) these events do not alter the allocation itself, and
* (b) it's possible that some event would have been triggered
* multiple times, instead of only once, if the allocations
* were handled individually, but it would do no harm (or
* even be beneficial) to coalesce the triggerings.
*/
thread_alloc_event(tsd, progress * usize);
if (progress < batch || prof_sample_event) {
void *p = je_mallocx(size, flags);
if (p == NULL) { /* OOM */
break;
}
if (progress == batch) {
assert(prof_sampled(tsd, p));
}
ptrs[filled++] = p;
}
}
label_done:
check_entry_exit_locking(tsd_tsdn(tsd));
LOG("core.batch_alloc.exit", "result: %zu", filled);
return filled;
}
/*
* End non-standard functions.
*/

View file

@ -1,177 +0,0 @@
#include "test/jemalloc_test.h"
#include "test/bench.h"
#define TINY_BATCH 10
#define TINY_BATCH_ITER (10 * 1000 * 1000)
#define HUGE_BATCH (1000 * 1000)
#define HUGE_BATCH_ITER 100
#define LEN (100 * 1000 * 1000)
static void *batch_ptrs[LEN];
static size_t batch_ptrs_next = 0;
static void *item_ptrs[LEN];
static size_t item_ptrs_next = 0;
#define SIZE 7
static void
batch_alloc_wrapper(size_t batch) {
size_t filled = batch_alloc(batch_ptrs + batch_ptrs_next, batch, SIZE, 0);
assert_zu_eq(filled, batch, "");
}
static void
item_alloc_wrapper(size_t batch) {
for (size_t i = item_ptrs_next, end = i + batch; i < end; ++i) {
item_ptrs[i] = jet_malloc(SIZE);
}
}
static void
release_and_clear(void **ptrs, size_t len) {
for (size_t i = 0; i < len; ++i) {
void *p = ptrs[i];
assert_ptr_not_null(p, "allocation failed");
jet_sdallocx(p, SIZE, 0);
ptrs[i] = NULL;
}
}
static void
batch_alloc_without_free(size_t batch) {
batch_alloc_wrapper(batch);
batch_ptrs_next += batch;
}
static void
item_alloc_without_free(size_t batch) {
item_alloc_wrapper(batch);
item_ptrs_next += batch;
}
static void
batch_alloc_with_free(size_t batch) {
batch_alloc_wrapper(batch);
release_and_clear(batch_ptrs + batch_ptrs_next, batch);
batch_ptrs_next += batch;
}
static void
item_alloc_with_free(size_t batch) {
item_alloc_wrapper(batch);
release_and_clear(item_ptrs + item_ptrs_next, batch);
item_ptrs_next += batch;
}
static void
compare_without_free(size_t batch, size_t iter,
void (*batch_alloc_without_free_func)(void),
void (*item_alloc_without_free_func)(void)) {
assert(batch_ptrs_next == 0);
assert(item_ptrs_next == 0);
assert(batch * iter <= LEN);
for (size_t i = 0; i < iter; ++i) {
batch_alloc_without_free_func();
item_alloc_without_free_func();
}
release_and_clear(batch_ptrs, batch_ptrs_next);
batch_ptrs_next = 0;
release_and_clear(item_ptrs, item_ptrs_next);
item_ptrs_next = 0;
compare_funcs(0, iter, "batch allocation",
batch_alloc_without_free_func, "item allocation",
item_alloc_without_free_func);
release_and_clear(batch_ptrs, batch_ptrs_next);
batch_ptrs_next = 0;
release_and_clear(item_ptrs, item_ptrs_next);
item_ptrs_next = 0;
}
static void
compare_with_free(size_t batch, size_t iter,
void (*batch_alloc_with_free_func)(void),
void (*item_alloc_with_free_func)(void)) {
assert(batch_ptrs_next == 0);
assert(item_ptrs_next == 0);
assert(batch * iter <= LEN);
for (size_t i = 0; i < iter; ++i) {
batch_alloc_with_free_func();
item_alloc_with_free_func();
}
batch_ptrs_next = 0;
item_ptrs_next = 0;
compare_funcs(0, iter, "batch allocation", batch_alloc_with_free_func,
"item allocation", item_alloc_with_free_func);
batch_ptrs_next = 0;
item_ptrs_next = 0;
}
static void
batch_alloc_without_free_tiny(void) {
batch_alloc_without_free(TINY_BATCH);
}
static void
item_alloc_without_free_tiny(void) {
item_alloc_without_free(TINY_BATCH);
}
TEST_BEGIN(test_tiny_batch_without_free) {
compare_without_free(TINY_BATCH, TINY_BATCH_ITER,
batch_alloc_without_free_tiny, item_alloc_without_free_tiny);
}
TEST_END
static void
batch_alloc_with_free_tiny(void) {
batch_alloc_with_free(TINY_BATCH);
}
static void
item_alloc_with_free_tiny(void) {
item_alloc_with_free(TINY_BATCH);
}
TEST_BEGIN(test_tiny_batch_with_free) {
compare_with_free(TINY_BATCH, TINY_BATCH_ITER,
batch_alloc_with_free_tiny, item_alloc_with_free_tiny);
}
TEST_END
static void
batch_alloc_without_free_huge(void) {
batch_alloc_without_free(HUGE_BATCH);
}
static void
item_alloc_without_free_huge(void) {
item_alloc_without_free(HUGE_BATCH);
}
TEST_BEGIN(test_huge_batch_without_free) {
compare_without_free(HUGE_BATCH, HUGE_BATCH_ITER,
batch_alloc_without_free_huge, item_alloc_without_free_huge);
}
TEST_END
static void
batch_alloc_with_free_huge(void) {
batch_alloc_with_free(HUGE_BATCH);
}
static void
item_alloc_with_free_huge(void) {
item_alloc_with_free(HUGE_BATCH);
}
TEST_BEGIN(test_huge_batch_with_free) {
compare_with_free(HUGE_BATCH, HUGE_BATCH_ITER,
batch_alloc_with_free_huge, item_alloc_with_free_huge);
}
TEST_END
int
main(void) {
return test_no_reentrancy(test_tiny_batch_without_free,
test_tiny_batch_with_free, test_huge_batch_without_free,
test_huge_batch_with_free);
}

View file

@ -1,175 +0,0 @@
#include "test/jemalloc_test.h"
#define BATCH_MAX ((1U << 16) + 1024)
static void *global_ptrs[BATCH_MAX];
#define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
static void
verify_batch_basic(
tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero) {
for (size_t i = 0; i < batch; ++i) {
void *p = ptrs[i];
expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
if (zero) {
for (size_t k = 0; k < usize; ++k) {
expect_true(*((unsigned char *)p + k) == 0, "");
}
}
}
}
static void
verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
arena_t *arena, unsigned nregs) {
if (config_prof && opt_prof) {
/*
* Checking batch locality when prof is on is feasible but
* complicated, while checking the non-prof case suffices for
* unit-test purpose.
*/
return;
}
for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
if (j == nregs) {
j = 0;
}
if (j == 0 && batch - i < nregs) {
break;
}
void *p = ptrs[i];
expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
if (j == 0) {
expect_true(PAGE_ALIGNED(p), "");
continue;
}
assert(i > 0);
void *q = ptrs[i - 1];
expect_true((uintptr_t)p > (uintptr_t)q
&& (size_t)((uintptr_t)p - (uintptr_t)q) == usize,
"");
}
}
static void
release_batch(void **ptrs, size_t batch, size_t size) {
for (size_t i = 0; i < batch; ++i) {
sdallocx(ptrs[i], size, 0);
}
}
static size_t
batch_alloc_wrapper(void **ptrs, size_t num, size_t size, int flags) {
return batch_alloc(ptrs, num, size, flags);
}
static void
test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
tsd_t *tsd = tsd_fetch();
assert(tsd != NULL);
const size_t usize = (alignment != 0 ? sz_sa2u(size, alignment)
: sz_s2u(size));
const szind_t ind = sz_size2index(usize);
const bin_info_t *bin_info = &bin_infos[ind];
const unsigned nregs = bin_info->nregs;
assert(nregs > 0);
arena_t *arena;
if (arena_flag != 0) {
arena = arena_get(
tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag), false);
} else {
arena = arena_choose(tsd, NULL);
}
assert(arena != NULL);
int flags = arena_flag;
if (alignment != 0) {
flags |= MALLOCX_ALIGN(alignment);
}
if (zero) {
flags |= MALLOCX_ZERO;
}
/*
* Allocate for the purpose of bootstrapping arena_tdata, so that the
* change in bin stats won't contaminate the stats to be verified below.
*/
void *p = mallocx(size, flags | MALLOCX_TCACHE_NONE);
for (size_t i = 0; i < 4; ++i) {
size_t base = 0;
if (i == 1) {
base = nregs;
} else if (i == 2) {
base = nregs * 2;
} else if (i == 3) {
base = (1 << 16);
}
for (int j = -1; j <= 1; ++j) {
if (base == 0 && j == -1) {
continue;
}
size_t batch = base + (size_t)j;
assert(batch < BATCH_MAX);
size_t filled = batch_alloc_wrapper(
global_ptrs, batch, size, flags);
assert_zu_eq(filled, batch, "");
verify_batch_basic(
tsd, global_ptrs, batch, usize, zero);
verify_batch_locality(
tsd, global_ptrs, batch, usize, arena, nregs);
release_batch(global_ptrs, batch, usize);
}
}
free(p);
}
TEST_BEGIN(test_batch_alloc) {
test_wrapper(11, 0, false, 0);
}
TEST_END
TEST_BEGIN(test_batch_alloc_zero) {
test_wrapper(11, 0, true, 0);
}
TEST_END
TEST_BEGIN(test_batch_alloc_aligned) {
test_wrapper(7, 16, false, 0);
}
TEST_END
TEST_BEGIN(test_batch_alloc_manual_arena) {
unsigned arena_ind;
size_t len_unsigned = sizeof(unsigned);
assert_d_eq(
mallctl("arenas.create", &arena_ind, &len_unsigned, NULL, 0), 0,
"");
test_wrapper(11, 0, false, MALLOCX_ARENA(arena_ind));
}
TEST_END
TEST_BEGIN(test_batch_alloc_large) {
size_t size = SC_LARGE_MINCLASS;
for (size_t batch = 0; batch < 4; ++batch) {
assert(batch < BATCH_MAX);
size_t filled = batch_alloc(global_ptrs, batch, size, 0);
assert_zu_eq(filled, batch, "");
release_batch(global_ptrs, batch, size);
}
size = global_do_not_change_tcache_maxclass + 1;
for (size_t batch = 0; batch < 4; ++batch) {
assert(batch < BATCH_MAX);
size_t filled = batch_alloc(global_ptrs, batch, size, 0);
assert_zu_eq(filled, batch, "");
release_batch(global_ptrs, batch, size);
}
}
TEST_END
int
main(void) {
return test(test_batch_alloc, test_batch_alloc_zero,
test_batch_alloc_aligned, test_batch_alloc_manual_arena,
test_batch_alloc_large);
}

View file

@ -1,3 +0,0 @@
#!/bin/sh
export MALLOC_CONF="tcache_gc_incr_bytes:2147483648"

View file

@ -1 +0,0 @@
#include "batch_alloc.c"

View file

@ -1,3 +0,0 @@
#!/bin/sh
export MALLOC_CONF="prof:true,lg_prof_sample:14"

View file

@ -51,34 +51,6 @@ do_flush_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
}
}
static void
do_batch_alloc_test(
cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill, size_t batch) {
assert_true(cache_bin_ncached_get_local(bin) == 0, "");
CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
cache_bin_init_ptr_array_for_fill(bin, &arr, nfill);
for (cache_bin_sz_t i = 0; i < nfill; i++) {
arr.ptr[i] = &ptrs[i];
}
cache_bin_finish_fill(bin, &arr, nfill);
assert_true(cache_bin_ncached_get_local(bin) == nfill, "");
cache_bin_low_water_set(bin);
void **out = malloc((batch + 1) * sizeof(void *));
size_t n = cache_bin_alloc_batch(bin, batch, out);
assert_true(n == ((size_t)nfill < batch ? (size_t)nfill : batch), "");
for (cache_bin_sz_t i = 0; i < (cache_bin_sz_t)n; i++) {
expect_ptr_eq(out[i], &ptrs[i], "");
}
expect_true(
cache_bin_low_water_get(bin) == nfill - (cache_bin_sz_t)n, "");
while (cache_bin_ncached_get_local(bin) > 0) {
bool success;
cache_bin_alloc(bin, &success);
}
free(out);
}
static void
test_bin_init(cache_bin_t *bin, cache_bin_info_t *info) {
size_t size;
@ -225,29 +197,6 @@ TEST_BEGIN(test_cache_bin) {
do_flush_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
do_flush_test(&bin, ptrs, ncached_max / 2, 0);
do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max);
do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max * 2);
do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max / 2);
do_batch_alloc_test(&bin, ptrs, ncached_max, 2);
do_batch_alloc_test(&bin, ptrs, ncached_max, 1);
do_batch_alloc_test(&bin, ptrs, ncached_max, 0);
do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max);
do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 2);
do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 1);
do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 0);
do_batch_alloc_test(&bin, ptrs, 2, ncached_max);
do_batch_alloc_test(&bin, ptrs, 2, 2);
do_batch_alloc_test(&bin, ptrs, 2, 1);
do_batch_alloc_test(&bin, ptrs, 2, 0);
do_batch_alloc_test(&bin, ptrs, 1, 2);
do_batch_alloc_test(&bin, ptrs, 1, 1);
do_batch_alloc_test(&bin, ptrs, 1, 0);
do_batch_alloc_test(&bin, ptrs, 0, 2);
do_batch_alloc_test(&bin, ptrs, 0, 1);
do_batch_alloc_test(&bin, ptrs, 0, 0);
free(ptrs);
}
TEST_END