From 7c964b03524de23eeff7fe203c764c7a0c0977ac Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Wed, 17 Mar 2021 16:35:57 -0700 Subject: [PATCH] Add rtree_write_range(): writing the same content to multiple leaf elements. Apply to emap_(de)register_interior which became noticeable in perf profiles. --- include/jemalloc/internal/rtree.h | 111 ++++++++++++++++++++++++------ src/emap.c | 35 +++++++--- src/pa.c | 2 +- test/unit/rtree.c | 66 +++++++++++++++++- 4 files changed, 182 insertions(+), 32 deletions(-) diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index 42aa11c9..c5f0d8c4 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -137,23 +137,24 @@ bool rtree_new(rtree_t *rtree, base_t *base, bool zeroed); rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); -JEMALLOC_ALWAYS_INLINE uintptr_t -rtree_leafkey(uintptr_t key) { +JEMALLOC_ALWAYS_INLINE unsigned +rtree_leaf_maskbits(void) { unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - rtree_levels[RTREE_HEIGHT-1].bits); - unsigned maskbits = ptrbits - cumbits; - uintptr_t mask = ~((ZU(1) << maskbits) - 1); + return ptrbits - cumbits; +} + +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_leafkey(uintptr_t key) { + uintptr_t mask = ~((ZU(1) << rtree_leaf_maskbits()) - 1); return (key & mask); } JEMALLOC_ALWAYS_INLINE size_t rtree_cache_direct_map(uintptr_t key) { - unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); - unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - - rtree_levels[RTREE_HEIGHT-1].bits); - unsigned maskbits = ptrbits - cumbits; - return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1)); + return (size_t)((key >> rtree_leaf_maskbits()) & + (RTREE_CTX_NCACHE - 1)); } JEMALLOC_ALWAYS_INLINE uintptr_t @@ -265,30 +266,49 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, #endif } -static inline void -rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, - rtree_leaf_elm_t *elm, rtree_contents_t contents) { - assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0); +JEMALLOC_ALWAYS_INLINE void +rtree_contents_encode(rtree_contents_t contents, void **bits, + unsigned *additional) { #ifdef RTREE_LEAF_COMPACT - uintptr_t bits = rtree_leaf_elm_bits_encode(contents); - atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); + *bits = (void *)rtree_leaf_elm_bits_encode(contents); #else - unsigned metadata_bits = (unsigned)contents.metadata.slab + *additional = (unsigned)contents.metadata.slab | ((unsigned)contents.metadata.is_head << 1) | ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT) | ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT + RTREE_LEAF_STATE_WIDTH)); - atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE); + *bits = contents.edata; +#endif +} + +JEMALLOC_ALWAYS_INLINE void +rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, void *bits, unsigned additional) { +#ifdef RTREE_LEAF_COMPACT + atomic_store_p(&elm->le_bits, bits, ATOMIC_RELEASE); +#else + atomic_store_u(&elm->le_metadata, additional, ATOMIC_RELEASE); /* * Write edata last, since the element is atomically considered valid * as soon as the edata field is non-NULL. */ - atomic_store_p(&elm->le_edata, contents.edata, ATOMIC_RELEASE); + atomic_store_p(&elm->le_edata, bits, ATOMIC_RELEASE); #endif } +JEMALLOC_ALWAYS_INLINE void +rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, rtree_contents_t contents) { + assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0); + void *bits; + unsigned additional; + + rtree_contents_encode(contents, &bits, &additional); + rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional); +} + /* The state field can be updated independently (and more frequently). */ -static inline void +JEMALLOC_ALWAYS_INLINE void rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm1, rtree_leaf_elm_t *elm2, extent_state_t state) { assert(elm1 != NULL); @@ -447,7 +467,45 @@ rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ct return false; } -static inline bool +JEMALLOC_ALWAYS_INLINE void +rtree_write_range_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t base, uintptr_t end, rtree_contents_t contents, bool clearing) { + assert((base & PAGE_MASK) == 0 && (end & PAGE_MASK) == 0); + /* + * Only used for emap_(de)register_interior, which implies the + * boundaries have been registered already. Therefore all the lookups + * are dependent w/o init_missing, assuming the range spans across at + * most 2 rtree leaf nodes (each covers 1 GiB of vaddr). + */ + void *bits; + unsigned additional; + rtree_contents_encode(contents, &bits, &additional); + + rtree_leaf_elm_t *elm = NULL; /* Dead store. */ + for (uintptr_t addr = base; addr <= end; addr += PAGE) { + if (addr == base || + (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) { + elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr, + /* dependent */ true, /* init_missing */ false); + assert(elm != NULL); + } + assert(elm == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr, + /* dependent */ true, /* init_missing */ false)); + assert(!clearing || rtree_leaf_elm_read(tsdn, rtree, elm, + /* dependent */ true).edata != NULL); + rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional); + elm++; + } +} + +JEMALLOC_ALWAYS_INLINE void +rtree_write_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t base, uintptr_t end, rtree_contents_t contents) { + rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents, + /* clearing */ false); +} + +JEMALLOC_ALWAYS_INLINE bool rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, rtree_contents_t contents) { rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, @@ -478,4 +536,17 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, rtree_leaf_elm_write(tsdn, rtree, elm, contents); } +static inline void +rtree_clear_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t base, uintptr_t end) { + rtree_contents_t contents; + contents.edata = NULL; + contents.metadata.szind = SC_NSIZES; + contents.metadata.slab = false; + contents.metadata.is_head = false; + contents.metadata.state = (extent_state_t)0; + rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents, + /* clearing */ true); +} + #endif /* JEMALLOC_INTERNAL_RTREE_H */ diff --git a/src/emap.c b/src/emap.c index 0fe230ab..a1f402b8 100644 --- a/src/emap.c +++ b/src/emap.c @@ -241,6 +241,7 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata, return false; } +/* Invoked *after* emap_register_boundary. */ void emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind) { @@ -249,6 +250,22 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata, assert(edata_slab_get(edata)); assert(edata_state_get(edata) == extent_state_active); + if (config_debug) { + /* Making sure the boundary is registered already. */ + rtree_leaf_elm_t *elm_a, *elm_b; + bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, + edata, /* dependent */ true, /* init_missing */ false, + &elm_a, &elm_b); + assert(!err); + rtree_contents_t contents_a, contents_b; + contents_a = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a, + /* dependent */ true); + contents_b = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b, + /* dependent */ true); + assert(contents_a.edata == edata && contents_b.edata == edata); + assert(contents_a.metadata.slab && contents_b.metadata.slab); + } + rtree_contents_t contents; contents.edata = edata; contents.metadata.szind = szind; @@ -256,12 +273,10 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata, contents.metadata.state = extent_state_active; contents.metadata.is_head = false; /* Not allowed to access. */ - /* Register interior. */ - for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) { - rtree_write(tsdn, &emap->rtree, rtree_ctx, - (uintptr_t)edata_base_get(edata) + (uintptr_t)(i << - LG_PAGE), contents); - } + assert(edata_size_get(edata) > (2 << LG_PAGE)); + rtree_write_range(tsdn, &emap->rtree, rtree_ctx, + (uintptr_t)edata_base_get(edata) + PAGE, + (uintptr_t)edata_last_get(edata) - PAGE, contents); } void @@ -289,10 +304,10 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) { EMAP_DECLARE_RTREE_CTX; assert(edata_slab_get(edata)); - for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) { - rtree_clear(tsdn, &emap->rtree, rtree_ctx, - (uintptr_t)edata_base_get(edata) + (uintptr_t)(i << - LG_PAGE)); + if (edata_size_get(edata) > (2 << LG_PAGE)) { + rtree_clear_range(tsdn, &emap->rtree, rtree_ctx, + (uintptr_t)edata_base_get(edata) + PAGE, + (uintptr_t)edata_last_get(edata) - PAGE); } } diff --git a/src/pa.c b/src/pa.c index dd61aaa2..90809b35 100644 --- a/src/pa.c +++ b/src/pa.c @@ -120,7 +120,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment, emap_remap(tsdn, shard->emap, edata, szind, slab); edata_szind_set(edata, szind); edata_slab_set(edata, slab); - if (slab) { + if (slab && (size > 2 * PAGE)) { emap_register_interior(tsdn, shard->emap, edata, szind); } } diff --git a/test/unit/rtree.c b/test/unit/rtree.c index 9251652c..7b2a4e36 100644 --- a/test/unit/rtree.c +++ b/test/unit/rtree.c @@ -210,11 +210,75 @@ TEST_BEGIN(test_rtree_random) { } TEST_END +static void +test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start, + uintptr_t end) { + rtree_ctx_t rtree_ctx; + rtree_ctx_data_init(&rtree_ctx); + + edata_t *edata_e = alloc_edata(); + edata_init(edata_e, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0, + extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD); + rtree_contents_t contents; + contents.edata = edata_e; + contents.metadata.szind = SC_NSIZES; + contents.metadata.slab = false; + contents.metadata.is_head = false; + contents.metadata.state = extent_state_active; + + expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start, + contents), "Unexpected rtree_write() failure"); + expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end, + contents), "Unexpected rtree_write() failure"); + + rtree_write_range(tsdn, rtree, &rtree_ctx, start, end, contents); + for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) { + expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx, + start + (i << LG_PAGE)).edata, edata_e, + "rtree_edata_read() should return previously set value"); + } + rtree_clear_range(tsdn, rtree, &rtree_ctx, start, end); + rtree_leaf_elm_t *elm; + for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) { + elm = rtree_leaf_elm_lookup(tsdn, rtree, &rtree_ctx, + start + (i << LG_PAGE), false, false); + expect_ptr_not_null(elm, "Should have been initialized."); + expect_ptr_null(rtree_leaf_elm_read(tsdn, rtree, elm, + false).edata, "Should have been cleared."); + } +} + +TEST_BEGIN(test_rtree_range) { + tsdn_t *tsdn = tsdn_fetch(); + base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks); + expect_ptr_not_null(base, "Unexpected base_new failure"); + + rtree_t *rtree = &test_rtree; + expect_false(rtree_new(rtree, base, false), + "Unexpected rtree_new() failure"); + + /* Not crossing rtree node boundary first. */ + uintptr_t start = ZU(1) << rtree_leaf_maskbits(); + uintptr_t end = start + (ZU(100) << LG_PAGE); + test_rtree_range_write(tsdn, rtree, start, end); + + /* Crossing rtree node boundary. */ + start = (ZU(1) << rtree_leaf_maskbits()) - (ZU(10) << LG_PAGE); + end = start + (ZU(100) << LG_PAGE); + assert_ptr_ne((void *)rtree_leafkey(start), (void *)rtree_leafkey(end), + "The range should span across two rtree nodes"); + test_rtree_range_write(tsdn, rtree, start, end); + + base_delete(tsdn, base); +} +TEST_END + int main(void) { return test( test_rtree_read_empty, test_rtree_extrema, test_rtree_bits, - test_rtree_random); + test_rtree_random, + test_rtree_range); }