diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 0fb08421..f0297045 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -46,6 +46,12 @@ struct cache_bin_s { * the head points to one element past the owned array. */ void **stack_head; + /* + * cur_ptr and stats are both modified frequently. Let's keep them + * close so that they have a higher chance of being on the same + * cacheline, thus less write-backs. + */ + cache_bin_stats_t tstats; /* * The low bits of the address of the first item in the stack that @@ -76,12 +82,6 @@ struct cache_bin_s { */ uint16_t low_bits_empty; - /* - * cur_ptr and stats are both modified frequently. Let's keep them - * close so that they have a higher chance of being on the same - * cacheline, thus less write-backs. - */ - cache_bin_stats_t tstats; }; typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t; @@ -201,36 +201,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor, } JEMALLOC_ALWAYS_INLINE void * -cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, - const bool adjust_low_water) { - /* - * This may read from the empty position; however the loaded value won't - * be used. It's safe because the stack has one more slot reserved. - */ - void *ret = *bin->stack_head; - uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head; - void **new_head = bin->stack_head + 1; - /* - * Note that the low water mark is at most empty; if we pass this check, - * we know we're non-empty. - */ - if (unlikely(low_bits == bin->low_bits_low_water)) { - if (adjust_low_water) { - if (unlikely(low_bits == bin->low_bits_empty)) { - *success = false; - return NULL; - } - /* Overflow should be impossible. */ - assert(bin->low_bits_low_water - < (uint16_t)(uintptr_t)new_head); - bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head; - } else { - *success = false; - return NULL; - } - } - bin->stack_head = new_head; - +cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { /* * success (instead of ret) should be checked upon the return of this * function. We avoid checking (ret == NULL) because there is never a @@ -238,22 +209,52 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, * and eagerly checking ret would cause pipeline stall (waiting for the * cacheline). */ - *success = true; - return ret; + /* + * This may read from the empty position; however the loaded value won't + * be used. It's safe because the stack has one more slot reserved. + */ + void *ret = *bin->stack_head; + uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head; + void **new_head = bin->stack_head + 1; + + /* + * Note that the low water mark is at most empty; if we pass this check, + * we know we're non-empty. + */ + if (likely(low_bits != bin->low_bits_low_water)) { + bin->stack_head = new_head; + *success = true; + return ret; + } + if (!adjust_low_water) { + *success = false; + return NULL; + } + /* + * In the fast-path case where we call alloc_easy and then alloc, the + * previous checking and computation is optimized away -- we didn't + * actually commit any of our operations. + */ + if (likely(low_bits != bin->low_bits_empty)) { + bin->stack_head = new_head; + bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head; + *success = true; + return ret; + } + *success = false; + return NULL; } JEMALLOC_ALWAYS_INLINE void * -cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) { +cache_bin_alloc_easy(cache_bin_t *bin, bool *success) { /* We don't look at info if we're not adjusting low-water. */ - return cache_bin_alloc_easy_impl(bin, success, false); + return cache_bin_alloc_impl(bin, success, false); } JEMALLOC_ALWAYS_INLINE void * -cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) { - /* We don't use info now, but we may want to in the future. */ - (void)info; - return cache_bin_alloc_easy_impl(bin, success, true); +cache_bin_alloc(cache_bin_t *bin, bool *success) { + return cache_bin_alloc_impl(bin, success, true); } JEMALLOC_ALWAYS_INLINE bool diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 2d31ad0e..3b78ed27 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -36,8 +36,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, assert(binind < SC_NBINS); bin = tcache_small_bin_get(tcache, binind); - ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind], - &tcache_success); + ret = cache_bin_alloc(bin, &tcache_success); assert(tcache_success == (ret != NULL)); if (unlikely(!tcache_success)) { bool tcache_hard_success; @@ -80,8 +79,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, assert(binind >= SC_NBINS &&binind < nhbins); bin = tcache_large_bin_get(tcache, binind); - ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind], - &tcache_success); + ret = cache_bin_alloc(bin, &tcache_success); assert(tcache_success == (ret != NULL)); if (unlikely(!tcache_success)) { /* diff --git a/src/jemalloc.c b/src/jemalloc.c index 12b4f6c3..758e3244 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2377,6 +2377,17 @@ malloc_default(size_t size) { * Begin malloc(3)-compatible functions. */ +JEMALLOC_ALWAYS_INLINE void +fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after, + cache_bin_t *bin, void *ret) { + thread_allocated_set(tsd, allocated_after); + if (config_stats) { + bin->tstats.nrequests++; + } + + LOG("core.malloc.exit", "result: %p", ret); +} + /* * malloc() fastpath. * @@ -2451,17 +2462,22 @@ je_malloc(size_t size) { tcache_t *tcache = tsd_tcachep_get(tsd); cache_bin_t *bin = tcache_small_bin_get(tcache, ind); bool tcache_success; - void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success); + void *ret; + /* + * We split up the code this way so that redundant low-water + * computation doesn't happen on the (more common) case in which we + * don't touch the low water mark. The compiler won't do this + * duplication on its own. + */ + ret = cache_bin_alloc_easy(bin, &tcache_success); if (tcache_success) { - thread_allocated_set(tsd, allocated_after); - if (config_stats) { - bin->tstats.nrequests++; - } - - LOG("core.malloc.exit", "result: %p", ret); - - /* Fastpath success */ + fastpath_success_finish(tsd, allocated_after, bin, ret); + return ret; + } + ret = cache_bin_alloc(bin, &tcache_success); + if (tcache_success) { + fastpath_success_finish(tsd, allocated_after, bin, ret); return ret; } diff --git a/src/tcache.c b/src/tcache.c index e9632236..9afc0063 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -104,8 +104,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, assert(tcache->arena != NULL); arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind); - ret = cache_bin_alloc_easy(tbin, &tcache_bin_info[binind], - tcache_success); + ret = cache_bin_alloc(tbin, tcache_success); return ret; } diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c index 2623b384..cbd8ce02 100644 --- a/test/unit/cache_bin.c +++ b/test/unit/cache_bin.c @@ -17,7 +17,7 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs, cache_bin_low_water_set(bin); for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) { - ptr = cache_bin_alloc_easy(bin, info, &success); + ptr = cache_bin_alloc(bin, &success); expect_true(success, ""); expect_ptr_eq(ptr, (void *)&ptrs[i], "Should pop in order filled"); @@ -48,7 +48,7 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs, expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, ""); while (cache_bin_ncached_get(bin, info) > 0) { - cache_bin_alloc_easy(bin, info, &success); + cache_bin_alloc(bin, &success); } } @@ -78,11 +78,11 @@ TEST_BEGIN(test_cache_bin) { expect_true(cache_bin_ncached_get(&bin, &info) == 0, ""); expect_true(cache_bin_low_water_get(&bin, &info) == 0, ""); - ptr = cache_bin_alloc_easy_reduced(&bin, &success); + ptr = cache_bin_alloc_easy(&bin, &success); expect_false(success, "Shouldn't successfully allocate when empty"); expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure"); - ptr = cache_bin_alloc_easy(&bin, &info, &success); + ptr = cache_bin_alloc(&bin, &success); expect_false(success, "Shouldn't successfully allocate when empty"); expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure"); @@ -112,10 +112,10 @@ TEST_BEGIN(test_cache_bin) { expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max - i, ""); /* - * This should fail -- the reduced version can't change low - * water. + * This should fail -- the easy variant can't change the low + * water mark. */ - ptr = cache_bin_alloc_easy_reduced(&bin, &success); + ptr = cache_bin_alloc_easy(&bin, &success); expect_ptr_null(ptr, ""); expect_false(success, ""); expect_true(cache_bin_low_water_get(&bin, &info) @@ -124,7 +124,7 @@ TEST_BEGIN(test_cache_bin) { == ncached_max - i, ""); /* This should succeed, though. */ - ptr = cache_bin_alloc_easy(&bin, &info, &success); + ptr = cache_bin_alloc(&bin, &success); expect_true(success, ""); expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1], "Alloc should pop in stack order"); @@ -135,10 +135,10 @@ TEST_BEGIN(test_cache_bin) { } /* Now we're empty -- all alloc attempts should fail. */ expect_true(cache_bin_ncached_get(&bin, &info) == 0, ""); - ptr = cache_bin_alloc_easy_reduced(&bin, &success); + ptr = cache_bin_alloc_easy(&bin, &success); expect_ptr_null(ptr, ""); expect_false(success, ""); - ptr = cache_bin_alloc_easy(&bin, &info, &success); + ptr = cache_bin_alloc(&bin, &success); expect_ptr_null(ptr, ""); expect_false(success, ""); @@ -156,18 +156,18 @@ TEST_BEGIN(test_cache_bin) { * Size is bigger than low water -- the reduced version should * succeed. */ - ptr = cache_bin_alloc_easy_reduced(&bin, &success); + ptr = cache_bin_alloc_easy(&bin, &success); expect_true(success, ""); expect_ptr_eq(ptr, &ptrs[i], ""); } /* But now, we've hit low-water. */ - ptr = cache_bin_alloc_easy_reduced(&bin, &success); + ptr = cache_bin_alloc_easy(&bin, &success); expect_false(success, ""); expect_ptr_null(ptr, ""); /* We're going to test filling -- we must be empty to start. */ while (cache_bin_ncached_get(&bin, &info)) { - cache_bin_alloc_easy(&bin, &info, &success); + cache_bin_alloc(&bin, &success); expect_true(success, ""); }