This commit is contained in:
Basheer AL-Jarrah 2026-04-14 09:23:29 +01:00 committed by GitHub
commit d67958121d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 104 additions and 11 deletions

View file

@ -38,8 +38,12 @@ environment:
install:
- set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
- if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
- if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
- bash -c "python3 -c \"import re; f='c:/msys64/etc/pacman.conf'; c=open(f).read(); open(f,'w').write(re.sub(r'^SigLevel\s*=.*$', 'SigLevel = Never', c, flags=re.MULTILINE))\""
- pacman --noconfirm -Syuu
- pacman --noconfirm -S msys2-keyring
- bash -c "python3 -c \"import re; f='c:/msys64/etc/pacman.conf'; c=open(f).read(); open(f,'w').write(re.sub(r'^SigLevel\s*=.*$', 'SigLevel = Required DatabaseOptional', c, flags=re.MULTILINE))\""
- pacman --noconfirm -Syuu
- if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
- pacman --noconfirm -S autoconf
build_script:

View file

@ -62,8 +62,26 @@ void bin_prefork(tsdn_t *tsdn, bin_t *bin);
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
/* Slab region allocation. */
void *bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info);
/*
* Slab region allocation locking contracts:
*
* bin_slab_reg_alloc()
* REQUIRES: bin->lock held by the caller.
* Calls bitmap_sfu() bitmap_set(), which performs a non-atomic
* read-modify-write on the slab bitmap at every tree level. Two threads
* that reach bitmap_sfu() concurrently (even for different bits in the same
* group word) will corrupt the bitmap and eventually abort inside
* bitmap_set(). malloc_mutex_assert_owner() is used internally to catch
* call sites that forget to acquire the lock.
*
* bin_slab_reg_alloc_batch()
* Shared slab (already inserted into a bin): bin->lock MUST be held.
* Fresh slab (just returned by arena_slab_alloc, not yet visible to any
* other thread): no lock required.
* The caller is responsible for selecting the correct case.
*/
void *bin_slab_reg_alloc(tsdn_t *tsdn, bin_t *bin,
edata_t *slab, const bin_info_t *bin_info);
void bin_slab_reg_alloc_batch(
edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs);

View file

@ -210,6 +210,33 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
}
/*
* bitmap_set() performs a non-atomic read-modify-write on every level of the
* bitmap tree:
*
* g = *gp; // READ
* g ^= ZU(1) << bit; // MODIFY (thread-local copy)
* *gp = g; // WRITE BACK — not atomic, no barrier
*
* If two threads enter bitmap_set() concurrently for bits that share the same
* group word (or a common ancestor group in the BITMAP_USE_TREE path), one
* thread's write silently clobbers the other's. On the next call the
* clobbered bit still appears free; bitmap_sfu() returns it again; the second
* call to bitmap_set() for that bit trips:
*
* assert(!bitmap_get(bitmap, binfo, bit)); // line 220 — bit already set
*
* or, after a group drains to zero and tree propagation begins:
*
* assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); // line 237
*
* Either assert calls abort() and produces the observed coredump.
*
* bitmap_set() is intentionally not thread-safe. Callers must hold the
* owning bin_t.lock for the entire duration of bitmap_sfu() bitmap_set().
* The fix enforces this contract at the bin_slab_reg_alloc() call boundary
* via malloc_mutex_assert_owner(); see src/bin.c and include/.../bin.h.
*/
static inline void
bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
size_t goff;

View file

@ -68,12 +68,41 @@ bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
malloc_mutex_postfork_child(tsdn, &bin->lock);
}
/*
* bin_slab_reg_alloc enforce bin->lock ownership before touching the bitmap.
*
* Root cause of issue #2875 (and #2772):
* bitmap_set() is not thread-safe. It does a plain read-modify-write on
* each level of the bitmap tree with no atomics or barriers. If two threads
* ever reach bitmap_sfu() bitmap_set() concurrently on the same slab
* bitmap even for different bit positions that share a group word one
* write silently discards the other. The clobbered bit still looks free on
* the next allocation attempt; bitmap_sfu() selects it again; the second
* call to bitmap_set() fires:
*
* assert(!bitmap_get(bitmap, binfo, bit)); // bitmap.h:220
*
* which calls abort() and produces the coredump.
*
* The existing callers (bin_malloc_with_fresh_slab, bin_malloc_no_fresh_slab)
* already assert lock ownership via malloc_mutex_assert_owner(), but
* bin_slab_reg_alloc() itself had no such check, making it easy for future
* callers to silently bypass the requirement.
*
* Fix: thread tsdn_t *tsdn and bin_t *bin through the signature and add
* malloc_mutex_assert_owner() as the first statement, so any caller that
* forgets to hold the lock is caught immediately in debug builds rather than
* producing a rare, hard-to-reproduce coredump in production.
*/
void *
bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
bin_slab_reg_alloc(tsdn_t *tsdn, bin_t *bin,
edata_t *slab, const bin_info_t *bin_info) {
void *ret;
slab_data_t *slab_data = edata_slab_data_get(slab);
size_t regind;
malloc_mutex_assert_owner(tsdn, &bin->lock);
assert(edata_nfree_get(slab) > 0);
assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
@ -281,7 +310,7 @@ bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
malloc_mutex_assert_owner(tsdn, &bin->lock);
bin_refill_slabcur_with_fresh_slab(tsdn, bin, binind, fresh_slab);
return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
return bin_slab_reg_alloc(tsdn, bin, bin->slabcur, &bin_infos[binind]);
}
bool
@ -312,7 +341,7 @@ bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin,
}
assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
return bin_slab_reg_alloc(tsdn, bin, bin->slabcur, &bin_infos[binind]);
}
bin_t *

View file

@ -54,12 +54,15 @@ TEST_END
* Test single-region allocation from a slab.
*/
TEST_BEGIN(test_bin_slab_reg_alloc) {
tsdn_t *tsdn = tsdn_fetch();
bin_t bin;
szind_t binind = 0;
const bin_info_t *bin_info = &bin_infos[binind];
edata_t slab;
unsigned nregs;
unsigned i;
bin_init(&bin);
create_mock_slab(&slab, binind, 0);
nregs = bin_info->nregs;
@ -68,7 +71,9 @@ TEST_BEGIN(test_bin_slab_reg_alloc) {
expect_u_gt(edata_nfree_get(&slab), 0,
"Slab should have free regions");
reg = bin_slab_reg_alloc(&slab, bin_info);
malloc_mutex_lock(tsdn, &bin.lock);
reg = bin_slab_reg_alloc(tsdn, &bin, &slab, bin_info);
malloc_mutex_unlock(tsdn, &bin.lock);
expect_ptr_not_null(reg,
"bin_slab_reg_alloc should return non-NULL");
/* Verify the pointer is within the slab. */
@ -198,6 +203,7 @@ TEST_END
* Test full slab list insert and remove (non-auto arena case).
*/
TEST_BEGIN(test_bin_slabs_full) {
tsdn_t *tsdn = tsdn_fetch();
bin_t bin;
szind_t binind = 0;
const bin_info_t *bin_info = &bin_infos[binind];
@ -208,9 +214,11 @@ TEST_BEGIN(test_bin_slabs_full) {
create_mock_slab(&slab, binind, 0);
/* Consume all regions so the slab appears full. */
malloc_mutex_lock(tsdn, &bin.lock);
for (i = 0; i < bin_info->nregs; i++) {
bin_slab_reg_alloc(&slab, bin_info);
bin_slab_reg_alloc(tsdn, &bin, &slab, bin_info);
}
malloc_mutex_unlock(tsdn, &bin.lock);
expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
/* Insert into full list (is_auto=false to actually track). */
@ -231,6 +239,7 @@ TEST_END
* Test that full slab insert/remove is a no-op for auto arenas.
*/
TEST_BEGIN(test_bin_slabs_full_auto) {
tsdn_t *tsdn = tsdn_fetch();
bin_t bin;
szind_t binind = 0;
const bin_info_t *bin_info = &bin_infos[binind];
@ -239,9 +248,11 @@ TEST_BEGIN(test_bin_slabs_full_auto) {
bin_init(&bin);
create_mock_slab(&slab, binind, 0);
malloc_mutex_lock(tsdn, &bin.lock);
for (i = 0; i < bin_info->nregs; i++) {
bin_slab_reg_alloc(&slab, bin_info);
bin_slab_reg_alloc(tsdn, &bin, &slab, bin_info);
}
malloc_mutex_unlock(tsdn, &bin.lock);
/* is_auto=true: insert should be a no-op. */
bin_slabs_full_insert(true, &bin, &slab);
@ -384,9 +395,11 @@ TEST_BEGIN(test_bin_refill_slabcur_full_to_list) {
create_mock_slab(&nonfull_slab, binind, 1);
/* Make full_slab actually full. */
malloc_mutex_lock(tsdn, &bin.lock);
for (i = 0; i < bin_info->nregs; i++) {
bin_slab_reg_alloc(&full_slab, bin_info);
bin_slab_reg_alloc(tsdn, &bin, &full_slab, bin_info);
}
malloc_mutex_unlock(tsdn, &bin.lock);
malloc_mutex_lock(tsdn, &bin.lock);
bin.slabcur = &full_slab;
@ -494,10 +507,12 @@ TEST_BEGIN(test_bin_dalloc_locked) {
nregs = bin_info->nregs;
ptrs = mallocx(nregs * sizeof(void *), 0);
assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
malloc_mutex_lock(tsdn, &bin.lock);
for (i = 0; i < nregs; i++) {
ptrs[i] = bin_slab_reg_alloc(&slab, bin_info);
ptrs[i] = bin_slab_reg_alloc(tsdn, &bin, &slab, bin_info);
assert_ptr_not_null(ptrs[i], "Alloc should succeed");
}
malloc_mutex_unlock(tsdn, &bin.lock);
expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
/* Set this slab as slabcur so dalloc steps work correctly. */