Remove batch_alloc API

2026-07-31 07:28:02 +03:00 · 2026-05-22 16:21:04 -07:00 · 2026-05-22 16:21:04 -07:00 · 1e5cb5346d
commit 1e5cb5346d
parent 99c6e7c7ac
10 changed files with 2 additions and 608 deletions
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@ -2028,185 +2028,6 @@ je_malloc_size(const void *ptr) {
 }
 #endif

-static void
-batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
-	assert(config_prof && opt_prof);
-	bool prof_sample_event = te_prof_sample_event_lookahead(
-	    tsd, batch * usize);
-	assert(!prof_sample_event);
-	size_t surplus;
-	prof_sample_event = te_prof_sample_event_lookahead_surplus(
-	    tsd, (batch + 1) * usize, &surplus);
-	assert(prof_sample_event);
-	assert(surplus < usize);
-}
-
-size_t
-batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
-	LOG("core.batch_alloc.entry",
-	    "ptrs: %p, num: %zu, size: %zu, flags: %d", ptrs, num, size, flags);
-
-	tsd_t *tsd = tsd_fetch();
-	check_entry_exit_locking(tsd_tsdn(tsd));
-
-	size_t filled = 0;
-
-	if (unlikely(tsd == NULL || tsd_reentrancy_level_get(tsd) > 0)) {
-		goto label_done;
-	}
-
-	size_t alignment = MALLOCX_ALIGN_GET(flags);
-	size_t usize;
-	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
-		goto label_done;
-	}
-	szind_t ind = sz_size2index(usize);
-	bool    zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
-
-	/*
-	 * The cache bin and arena will be lazily initialized; it's hard to
-	 * know in advance whether each of them needs to be initialized.
-	 */
-	cache_bin_t *bin = NULL;
-	arena_t     *arena = NULL;
-
-	size_t nregs = 0;
-	if (likely(ind < SC_NBINS)) {
-		nregs = bin_infos[ind].nregs;
-		assert(nregs > 0);
-	}
-
-	while (filled < num) {
-		size_t batch = num - filled;
-		size_t surplus = SIZE_MAX; /* Dead store. */
-		bool   prof_sample_event = config_prof && opt_prof
-		    && prof_active_get_unlocked()
-		    && te_prof_sample_event_lookahead_surplus(
-		        tsd, batch * usize, &surplus);
-
-		if (prof_sample_event) {
-			/*
-			 * Adjust so that the batch does not trigger prof
-			 * sampling.
-			 */
-			batch -= surplus / usize + 1;
-			batch_alloc_prof_sample_assert(tsd, batch, usize);
-		}
-
-		size_t progress = 0;
-
-		if (likely(ind < SC_NBINS) && batch >= nregs) {
-			if (arena == NULL) {
-				unsigned arena_ind = mallocx_arena_get(flags);
-				if (arena_get_from_ind(
-				        tsd, arena_ind, &arena)) {
-					goto label_done;
-				}
-				if (arena == NULL) {
-					arena = arena_choose(tsd, NULL);
-				}
-				if (unlikely(arena == NULL)) {
-					goto label_done;
-				}
-			}
-			size_t arena_batch = batch - batch % nregs;
-			size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
-			    ind, ptrs + filled, arena_batch, zero);
-			progress += n;
-			filled += n;
-		}
-
-		unsigned  tcache_ind = mallocx_tcache_get(flags);
-		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
-		    /* slow */ true, /* is_alloc */ true);
-		if (likely(tcache != NULL
-		        && ind < tcache_nbins_get(tcache->tcache_slow)
-		        && !tcache_bin_disabled(
-		            ind, &tcache->bins[ind], tcache->tcache_slow))
-		    && progress < batch) {
-			if (bin == NULL) {
-				bin = &tcache->bins[ind];
-			}
-			/*
-			 * If we don't have a tcache bin, we don't want to
-			 * immediately give up, because there's the possibility
-			 * that the user explicitly requested to bypass the
-			 * tcache, or that the user explicitly turned off the
-			 * tcache; in such cases, we go through the slow path,
-			 * i.e. the mallocx() call at the end of the while loop.
-			 */
-			if (bin != NULL) {
-				size_t bin_batch = batch - progress;
-				/*
-				 * n can be less than bin_batch, meaning that
-				 * the cache bin does not have enough memory.
-				 * In such cases, we rely on the slow path,
-				 * i.e. the mallocx() call at the end of the
-				 * while loop, to fill in the cache, and in the
-				 * next iteration of the while loop, the tcache
-				 * will contain a lot of memory, and we can
-				 * harvest them here.  Compared to the
-				 * alternative approach where we directly go to
-				 * the arena bins here, the overhead of our
-				 * current approach should usually be minimal,
-				 * since we never try to fetch more memory than
-				 * what a slab contains via the tcache.  An
-				 * additional benefit is that the tcache will
-				 * not be empty for the next allocation request.
-				 */
-				size_t n = cache_bin_alloc_batch(
-				    bin, bin_batch, ptrs + filled);
-				if (config_stats) {
-					bin->tstats.nrequests += n;
-				}
-				if (zero) {
-					for (size_t i = 0; i < n; ++i) {
-						memset(
-						    ptrs[filled + i], 0, usize);
-					}
-				}
-				if (config_prof && opt_prof
-				    && unlikely(ind >= SC_NBINS)) {
-					for (size_t i = 0; i < n; ++i) {
-						prof_tctx_reset_sampled(
-						    tsd, ptrs[filled + i]);
-					}
-				}
-				progress += n;
-				filled += n;
-			}
-		}
-
-		/*
-		 * For thread events other than prof sampling, trigger them as
-		 * if there's a single allocation of size (n * usize).  This is
-		 * fine because:
-		 * (a) these events do not alter the allocation itself, and
-		 * (b) it's possible that some event would have been triggered
-		 *     multiple times, instead of only once, if the allocations
-		 *     were handled individually, but it would do no harm (or
-		 *     even be beneficial) to coalesce the triggerings.
-		 */
-		thread_alloc_event(tsd, progress * usize);
-
-		if (progress < batch || prof_sample_event) {
-			void *p = je_mallocx(size, flags);
-			if (p == NULL) { /* OOM */
-				break;
-			}
-			if (progress == batch) {
-				assert(prof_sampled(tsd, p));
-			}
-			ptrs[filled++] = p;
-		}
-	}
-
-label_done:
-	check_entry_exit_locking(tsd_tsdn(tsd));
-	LOG("core.batch_alloc.exit", "result: %zu", filled);
-	return filled;
-}
-
 /*
 * End non-standard functions.
 */