mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-14 22:51:50 +03:00
bin: enforce bin->lock ownership in bin_slab_reg_alloc()
bitmap_set() performs a plain (non-atomic) read-modify-write on every
level of the bitmap tree:
g = *gp; /* READ */
g ^= ZU(1) << bit; /* MODIFY — thread-local copy */
*gp = g; /* WRITE BACK — no barrier, no CAS */
Two threads that reach bitmap_sfu() -> bitmap_set() concurrently on the
same slab bitmap — even for different bits that share a group word —
will clobber each other's write. The clobbered bit still looks free on
the next allocation; bitmap_sfu() selects it again; the second call to
bitmap_set() aborts on:
assert(!bitmap_get(bitmap, binfo, bit)); /* bitmap.h:220 */
or, once tree propagation begins for a newly-full group:
assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); /* bitmap.h:237 */
Either assert calls abort() and produces the coredump reported in
issues #2875 and #2772.
The immediate callers (bin_malloc_with_fresh_slab,
bin_malloc_no_fresh_slab) already assert lock ownership, but
bin_slab_reg_alloc() itself had no such check, making it easy for new
call sites to silently bypass the requirement.
Fix:
- Thread tsdn_t *tsdn and bin_t *bin through bin_slab_reg_alloc() and
call malloc_mutex_assert_owner() as the first statement.
- Update both internal callers (bin_malloc_with_fresh_slab,
bin_malloc_no_fresh_slab) to pass the context they already hold.
- Document the locking contract in bin.h and the thread-safety
constraint in bitmap.h directly above bitmap_set().
Note: bin_slab_reg_alloc_batch() is left unchanged because it has one
legitimate unlocked caller (arena_fill_small_fresh) which operates on
freshly allocated slabs that are not yet visible to any other thread.
Its locking contract is now documented in bin.h.
Fixes #2875
This commit is contained in:
parent
6515df8cec
commit
65a7d19928
3 changed files with 79 additions and 5 deletions
|
|
@ -62,8 +62,26 @@ void bin_prefork(tsdn_t *tsdn, bin_t *bin);
|
|||
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
|
||||
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
|
||||
|
||||
/* Slab region allocation. */
|
||||
void *bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info);
|
||||
/*
|
||||
* Slab region allocation — locking contracts:
|
||||
*
|
||||
* bin_slab_reg_alloc()
|
||||
* REQUIRES: bin->lock held by the caller.
|
||||
* Calls bitmap_sfu() → bitmap_set(), which performs a non-atomic
|
||||
* read-modify-write on the slab bitmap at every tree level. Two threads
|
||||
* that reach bitmap_sfu() concurrently (even for different bits in the same
|
||||
* group word) will corrupt the bitmap and eventually abort inside
|
||||
* bitmap_set(). malloc_mutex_assert_owner() is used internally to catch
|
||||
* call sites that forget to acquire the lock.
|
||||
*
|
||||
* bin_slab_reg_alloc_batch()
|
||||
* Shared slab (already inserted into a bin): bin->lock MUST be held.
|
||||
* Fresh slab (just returned by arena_slab_alloc, not yet visible to any
|
||||
* other thread): no lock required.
|
||||
* The caller is responsible for selecting the correct case.
|
||||
*/
|
||||
void *bin_slab_reg_alloc(tsdn_t *tsdn, bin_t *bin,
|
||||
edata_t *slab, const bin_info_t *bin_info);
|
||||
void bin_slab_reg_alloc_batch(
|
||||
edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs);
|
||||
|
||||
|
|
|
|||
|
|
@ -210,6 +210,33 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
|
|||
return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
|
||||
}
|
||||
|
||||
/*
|
||||
* bitmap_set() performs a non-atomic read-modify-write on every level of the
|
||||
* bitmap tree:
|
||||
*
|
||||
* g = *gp; // READ
|
||||
* g ^= ZU(1) << bit; // MODIFY (thread-local copy)
|
||||
* *gp = g; // WRITE BACK — not atomic, no barrier
|
||||
*
|
||||
* If two threads enter bitmap_set() concurrently for bits that share the same
|
||||
* group word (or a common ancestor group in the BITMAP_USE_TREE path), one
|
||||
* thread's write silently clobbers the other's. On the next call the
|
||||
* clobbered bit still appears free; bitmap_sfu() returns it again; the second
|
||||
* call to bitmap_set() for that bit trips:
|
||||
*
|
||||
* assert(!bitmap_get(bitmap, binfo, bit)); // line 220 — bit already set
|
||||
*
|
||||
* or, after a group drains to zero and tree propagation begins:
|
||||
*
|
||||
* assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); // line 237
|
||||
*
|
||||
* Either assert calls abort() and produces the observed coredump.
|
||||
*
|
||||
* bitmap_set() is intentionally not thread-safe. Callers must hold the
|
||||
* owning bin_t.lock for the entire duration of bitmap_sfu() → bitmap_set().
|
||||
* The fix enforces this contract at the bin_slab_reg_alloc() call boundary
|
||||
* via malloc_mutex_assert_owner(); see src/bin.c and include/.../bin.h.
|
||||
*/
|
||||
static inline void
|
||||
bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
|
||||
size_t goff;
|
||||
|
|
|
|||
35
src/bin.c
35
src/bin.c
|
|
@ -68,12 +68,41 @@ bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
|
|||
malloc_mutex_postfork_child(tsdn, &bin->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* bin_slab_reg_alloc — enforce bin->lock ownership before touching the bitmap.
|
||||
*
|
||||
* Root cause of issue #2875 (and #2772):
|
||||
* bitmap_set() is not thread-safe. It does a plain read-modify-write on
|
||||
* each level of the bitmap tree with no atomics or barriers. If two threads
|
||||
* ever reach bitmap_sfu() → bitmap_set() concurrently on the same slab
|
||||
* bitmap — even for different bit positions that share a group word — one
|
||||
* write silently discards the other. The clobbered bit still looks free on
|
||||
* the next allocation attempt; bitmap_sfu() selects it again; the second
|
||||
* call to bitmap_set() fires:
|
||||
*
|
||||
* assert(!bitmap_get(bitmap, binfo, bit)); // bitmap.h:220
|
||||
*
|
||||
* which calls abort() and produces the coredump.
|
||||
*
|
||||
* The existing callers (bin_malloc_with_fresh_slab, bin_malloc_no_fresh_slab)
|
||||
* already assert lock ownership via malloc_mutex_assert_owner(), but
|
||||
* bin_slab_reg_alloc() itself had no such check, making it easy for future
|
||||
* callers to silently bypass the requirement.
|
||||
*
|
||||
* Fix: thread tsdn_t *tsdn and bin_t *bin through the signature and add
|
||||
* malloc_mutex_assert_owner() as the first statement, so any caller that
|
||||
* forgets to hold the lock is caught immediately in debug builds rather than
|
||||
* producing a rare, hard-to-reproduce coredump in production.
|
||||
*/
|
||||
void *
|
||||
bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
|
||||
bin_slab_reg_alloc(tsdn_t *tsdn, bin_t *bin,
|
||||
edata_t *slab, const bin_info_t *bin_info) {
|
||||
void *ret;
|
||||
slab_data_t *slab_data = edata_slab_data_get(slab);
|
||||
size_t regind;
|
||||
|
||||
malloc_mutex_assert_owner(tsdn, &bin->lock);
|
||||
|
||||
assert(edata_nfree_get(slab) > 0);
|
||||
assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
|
||||
|
||||
|
|
@ -281,7 +310,7 @@ bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
|
|||
malloc_mutex_assert_owner(tsdn, &bin->lock);
|
||||
bin_refill_slabcur_with_fresh_slab(tsdn, bin, binind, fresh_slab);
|
||||
|
||||
return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
|
||||
return bin_slab_reg_alloc(tsdn, bin, bin->slabcur, &bin_infos[binind]);
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -312,7 +341,7 @@ bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin,
|
|||
}
|
||||
|
||||
assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
|
||||
return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
|
||||
return bin_slab_reg_alloc(tsdn, bin, bin->slabcur, &bin_infos[binind]);
|
||||
}
|
||||
|
||||
bin_t *
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue