From 65a7d1992838c133fd664531fbd3a5518c613369 Mon Sep 17 00:00:00 2001 From: LD-RW Date: Fri, 10 Apr 2026 20:45:51 +0300 Subject: [PATCH] bin: enforce bin->lock ownership in bin_slab_reg_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bitmap_set() performs a plain (non-atomic) read-modify-write on every level of the bitmap tree: g = *gp; /* READ */ g ^= ZU(1) << bit; /* MODIFY — thread-local copy */ *gp = g; /* WRITE BACK — no barrier, no CAS */ Two threads that reach bitmap_sfu() -> bitmap_set() concurrently on the same slab bitmap — even for different bits that share a group word — will clobber each other's write. The clobbered bit still looks free on the next allocation; bitmap_sfu() selects it again; the second call to bitmap_set() aborts on: assert(!bitmap_get(bitmap, binfo, bit)); /* bitmap.h:220 */ or, once tree propagation begins for a newly-full group: assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); /* bitmap.h:237 */ Either assert calls abort() and produces the coredump reported in issues #2875 and #2772. The immediate callers (bin_malloc_with_fresh_slab, bin_malloc_no_fresh_slab) already assert lock ownership, but bin_slab_reg_alloc() itself had no such check, making it easy for new call sites to silently bypass the requirement. Fix: - Thread tsdn_t *tsdn and bin_t *bin through bin_slab_reg_alloc() and call malloc_mutex_assert_owner() as the first statement. - Update both internal callers (bin_malloc_with_fresh_slab, bin_malloc_no_fresh_slab) to pass the context they already hold. - Document the locking contract in bin.h and the thread-safety constraint in bitmap.h directly above bitmap_set(). Note: bin_slab_reg_alloc_batch() is left unchanged because it has one legitimate unlocked caller (arena_fill_small_fresh) which operates on freshly allocated slabs that are not yet visible to any other thread. Its locking contract is now documented in bin.h. Fixes #2875 --- include/jemalloc/internal/bin.h | 22 +++++++++++++++++-- include/jemalloc/internal/bitmap.h | 27 +++++++++++++++++++++++ src/bin.c | 35 +++++++++++++++++++++++++++--- 3 files changed, 79 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h index 51d4c89e..b4f39e2a 100644 --- a/include/jemalloc/internal/bin.h +++ b/include/jemalloc/internal/bin.h @@ -62,8 +62,26 @@ void bin_prefork(tsdn_t *tsdn, bin_t *bin); void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin); void bin_postfork_child(tsdn_t *tsdn, bin_t *bin); -/* Slab region allocation. */ -void *bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info); +/* + * Slab region allocation — locking contracts: + * + * bin_slab_reg_alloc() + * REQUIRES: bin->lock held by the caller. + * Calls bitmap_sfu() → bitmap_set(), which performs a non-atomic + * read-modify-write on the slab bitmap at every tree level. Two threads + * that reach bitmap_sfu() concurrently (even for different bits in the same + * group word) will corrupt the bitmap and eventually abort inside + * bitmap_set(). malloc_mutex_assert_owner() is used internally to catch + * call sites that forget to acquire the lock. + * + * bin_slab_reg_alloc_batch() + * Shared slab (already inserted into a bin): bin->lock MUST be held. + * Fresh slab (just returned by arena_slab_alloc, not yet visible to any + * other thread): no lock required. + * The caller is responsible for selecting the correct case. + */ +void *bin_slab_reg_alloc(tsdn_t *tsdn, bin_t *bin, + edata_t *slab, const bin_info_t *bin_info); void bin_slab_reg_alloc_batch( edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs); diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index e0f596fb..bae6316f 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -210,6 +210,33 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); } +/* + * bitmap_set() performs a non-atomic read-modify-write on every level of the + * bitmap tree: + * + * g = *gp; // READ + * g ^= ZU(1) << bit; // MODIFY (thread-local copy) + * *gp = g; // WRITE BACK — not atomic, no barrier + * + * If two threads enter bitmap_set() concurrently for bits that share the same + * group word (or a common ancestor group in the BITMAP_USE_TREE path), one + * thread's write silently clobbers the other's. On the next call the + * clobbered bit still appears free; bitmap_sfu() returns it again; the second + * call to bitmap_set() for that bit trips: + * + * assert(!bitmap_get(bitmap, binfo, bit)); // line 220 — bit already set + * + * or, after a group drains to zero and tree propagation begins: + * + * assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); // line 237 + * + * Either assert calls abort() and produces the observed coredump. + * + * bitmap_set() is intentionally not thread-safe. Callers must hold the + * owning bin_t.lock for the entire duration of bitmap_sfu() → bitmap_set(). + * The fix enforces this contract at the bin_slab_reg_alloc() call boundary + * via malloc_mutex_assert_owner(); see src/bin.c and include/.../bin.h. + */ static inline void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { size_t goff; diff --git a/src/bin.c b/src/bin.c index 6bab4b22..3b06a8d9 100644 --- a/src/bin.c +++ b/src/bin.c @@ -68,12 +68,41 @@ bin_postfork_child(tsdn_t *tsdn, bin_t *bin) { malloc_mutex_postfork_child(tsdn, &bin->lock); } +/* + * bin_slab_reg_alloc — enforce bin->lock ownership before touching the bitmap. + * + * Root cause of issue #2875 (and #2772): + * bitmap_set() is not thread-safe. It does a plain read-modify-write on + * each level of the bitmap tree with no atomics or barriers. If two threads + * ever reach bitmap_sfu() → bitmap_set() concurrently on the same slab + * bitmap — even for different bit positions that share a group word — one + * write silently discards the other. The clobbered bit still looks free on + * the next allocation attempt; bitmap_sfu() selects it again; the second + * call to bitmap_set() fires: + * + * assert(!bitmap_get(bitmap, binfo, bit)); // bitmap.h:220 + * + * which calls abort() and produces the coredump. + * + * The existing callers (bin_malloc_with_fresh_slab, bin_malloc_no_fresh_slab) + * already assert lock ownership via malloc_mutex_assert_owner(), but + * bin_slab_reg_alloc() itself had no such check, making it easy for future + * callers to silently bypass the requirement. + * + * Fix: thread tsdn_t *tsdn and bin_t *bin through the signature and add + * malloc_mutex_assert_owner() as the first statement, so any caller that + * forgets to hold the lock is caught immediately in debug builds rather than + * producing a rare, hard-to-reproduce coredump in production. + */ void * -bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) { +bin_slab_reg_alloc(tsdn_t *tsdn, bin_t *bin, + edata_t *slab, const bin_info_t *bin_info) { void *ret; slab_data_t *slab_data = edata_slab_data_get(slab); size_t regind; + malloc_mutex_assert_owner(tsdn, &bin->lock); + assert(edata_nfree_get(slab) > 0); assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info)); @@ -281,7 +310,7 @@ bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin, malloc_mutex_assert_owner(tsdn, &bin->lock); bin_refill_slabcur_with_fresh_slab(tsdn, bin, binind, fresh_slab); - return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]); + return bin_slab_reg_alloc(tsdn, bin, bin->slabcur, &bin_infos[binind]); } bool @@ -312,7 +341,7 @@ bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin, } assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0); - return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]); + return bin_slab_reg_alloc(tsdn, bin, bin->slabcur, &bin_infos[binind]); } bin_t *