From 47c9bcd402110be3f64517ad9366d1cfaa751d48 Mon Sep 17 00:00:00 2001 From: Shirui Cheng Date: Thu, 18 Jul 2024 17:33:07 -0700 Subject: [PATCH] Use a for-loop to fulfill flush requests that are larger than CACHE_BIN_NFLUSH_BATCH_MAX items --- include/jemalloc/internal/cache_bin.h | 8 +++++ src/tcache.c | 47 ++++++++++++++++++--------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index a26c3671..a7a5e40e 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -600,6 +600,14 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached, */ } +/* + * Limit how many items can be flushed in a batch (Which is the upper bound + * for the nflush parameter in tcache_bin_flush_impl()). + * This is to avoid stack overflow when we do batch edata look up, which + * reserves a nflush * sizeof(emap_batch_lookup_result_t) stack variable. + */ +#define CACHE_BIN_NFLUSH_BATCH_MAX (VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR) + /* * Filling and flushing are done in batch, on arrays of void *s. For filling, * the arrays go forward, and can be accessed with ordinary array arithmetic. diff --git a/src/tcache.c b/src/tcache.c index 03ec5136..4144524d 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -712,22 +712,37 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin JEMALLOC_ALWAYS_INLINE void tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) { - /* - * The small/large flush logic is very similar; you might conclude that - * it's a good opportunity to share code. We've tried this, and by and - * large found this to obscure more than it helps; there are so many - * fiddly bits around things like stats handling, precisely when and - * which mutexes are acquired, etc., that almost all code ends up being - * gated behind 'if (small) { ... } else { ... }'. Even though the - * '...' is morally equivalent, the code itself needs slight tweaks. - */ - if (small) { - tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind, - ptrs, nflush); - } else { - tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind, - ptrs, nflush); - } + assert(ptrs != NULL && ptrs->ptr != NULL); + unsigned nflush_batch, nflushed = 0; + cache_bin_ptr_array_t ptrs_batch; + do { + nflush_batch = nflush - nflushed; + if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) { + nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX; + } + assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX); + (&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch; + (&ptrs_batch)->ptr = ptrs->ptr + nflushed; + /* + * The small/large flush logic is very similar; you might conclude that + * it's a good opportunity to share code. We've tried this, and by and + * large found this to obscure more than it helps; there are so many + * fiddly bits around things like stats handling, precisely when and + * which mutexes are acquired, etc., that almost all code ends up being + * gated behind 'if (small) { ... } else { ... }'. Even though the + * '...' is morally equivalent, the code itself needs slight tweaks. + */ + if (small) { + tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind, + &ptrs_batch, nflush_batch); + } else { + tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind, + &ptrs_batch, nflush_batch); + } + nflushed += nflush_batch; + } while (nflushed < nflush); + assert(nflush == nflushed); + assert((ptrs->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch)); } JEMALLOC_ALWAYS_INLINE void