From 2f4db8cf5dad7f53164bef13a2c5b49e1195d042 Mon Sep 17 00:00:00 2001 From: guangli-dai Date: Fri, 12 Jun 2026 11:52:11 -0700 Subject: [PATCH] Cap the base-block growth heuristic base_block_alloc() grows new base blocks along the page size-class series to reduce the number of disjoint VM ranges. This works well when new base blocks are rare. Under high thread churn, many threads can miss the base free pool in parallel while allocating metadata. base_extent_alloc() drops base->mtx after mapping a new block, but before splitting and inserting the rest into the reuse pool. Therefore, each parallel miss can map its own block and each completed allocation then advances base->pind_last. The result is that small metadata requests can drive the growth heuristic to increasingly large mmap() sizes, far beyond the actual allocation demand. Cap the heuristic growth size at 128 MiB. This preserves the usual amortization benefit while bounding the rare pathological case where parallel misses rapidly advance the growth series. Large individual requests are still honored because min_block_size continues to override the cap. --- src/base.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/base.c b/src/base.c index 3a76050f..02f648ff 100644 --- a/src/base.c +++ b/src/base.c @@ -19,6 +19,17 @@ JET_EXTERN ehooks_t *base_ehooks_get_for_metadata(base_t *base); #define BASE_AUTO_THP_THRESHOLD 2 #define BASE_AUTO_THP_THRESHOLD_A0 5 +/* + * Cap the base-block growth heuristic in base_block_alloc(). The + * growth heuristic reduces the number of disjoint VM ranges when new base + * blocks are rare, but high thread churn can cause many parallel misses for + * metadata allocations. Without a cap, those misses can advance + * base->pind_last causing small requests to mmap multi-TiB blocks and exhaust + * the address space. Large individual requests still use min_block_size and + * can exceed this cap. + */ +#define BASE_BLOCK_GROWTH_MAX ((size_t)128 << 20) /* 128 MiB */ + /******************************************************************************/ /* Data. */ @@ -376,7 +387,11 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind, pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ? *pind_last + 1 : *pind_last; - size_t next_block_size = base_block_size_ceil(sz_pind2sz(pind_next)); + size_t next_block_size = base_block_size_ceil(sz_pind2sz(pind_next)); + size_t max_block_size = base_block_size_ceil(BASE_BLOCK_GROWTH_MAX); + next_block_size = (next_block_size < max_block_size) + ? next_block_size + : max_block_size; size_t block_size = (min_block_size > next_block_size) ? min_block_size : next_block_size;