From fa2d64c94b07ee21a0f6f44b9fe6e3bbefa51c6c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 12 Feb 2017 17:03:46 -0800 Subject: [PATCH] Convert arena->prof_accumbytes synchronization to atomics. --- include/jemalloc/internal/arena_inlines_a.h | 34 +-------- include/jemalloc/internal/arena_structs_b.h | 3 +- include/jemalloc/internal/atomic_inlines.h | 4 +- include/jemalloc/internal/atomic_types.h | 8 ++ .../jemalloc/internal/jemalloc_internal.h.in | 7 +- include/jemalloc/internal/private_symbols.txt | 5 +- include/jemalloc/internal/prof_externs.h | 1 + include/jemalloc/internal/prof_inlines_a.h | 76 +++++++++++++++++++ .../{prof_inlines.h => prof_inlines_b.h} | 6 +- include/jemalloc/internal/prof_structs.h | 7 ++ include/jemalloc/internal/prof_types.h | 1 + include/jemalloc/internal/witness_types.h | 1 + src/arena.c | 18 +---- src/prof.c | 14 ++++ src/tcache.c | 2 +- 15 files changed, 128 insertions(+), 59 deletions(-) create mode 100644 include/jemalloc/internal/atomic_types.h create mode 100644 include/jemalloc/internal/prof_inlines_a.h rename include/jemalloc/internal/{prof_inlines.h => prof_inlines_b.h} (98%) diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h index a81aaf56..ea7e0995 100644 --- a/include/jemalloc/internal/arena_inlines_a.h +++ b/include/jemalloc/internal/arena_inlines_a.h @@ -6,8 +6,6 @@ unsigned arena_ind_get(const arena_t *arena); void arena_internal_add(arena_t *arena, size_t size); void arena_internal_sub(arena_t *arena, size_t size); size_t arena_internal_get(arena_t *arena); -bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); #endif /* JEMALLOC_ENABLE_INLINE */ @@ -33,29 +31,6 @@ arena_internal_get(arena_t *arena) { return atomic_read_zu(&arena->stats.internal); } -JEMALLOC_INLINE bool -arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) { - cassert(config_prof); - assert(prof_interval != 0); - - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - arena->prof_accumbytes %= prof_interval; - return true; - } - return false; -} - -JEMALLOC_INLINE bool -arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) { - cassert(config_prof); - - if (likely(prof_interval == 0)) { - return false; - } - return arena_prof_accum_impl(arena, accumbytes); -} - JEMALLOC_INLINE bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { cassert(config_prof); @@ -64,14 +39,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { return false; } - { - bool ret; - - malloc_mutex_lock(tsdn, &arena->lock); - ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(tsdn, &arena->lock); - return ret; - } + return prof_accum_add(tsdn, &arena->prof_accum, accumbytes); } #endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */ diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h index dde26894..2ee5690e 100644 --- a/include/jemalloc/internal/arena_structs_b.h +++ b/include/jemalloc/internal/arena_structs_b.h @@ -138,7 +138,8 @@ struct arena_s { */ ql_head(tcache_t) tcache_ql; - /* Synchronization: lock. */ + /* Synchronization: internal. */ + prof_accum_t prof_accum; uint64_t prof_accumbytes; /* diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h index 7c1902f8..de66d57d 100644 --- a/include/jemalloc/internal/atomic_inlines.h +++ b/include/jemalloc/internal/atomic_inlines.h @@ -23,7 +23,7 @@ */ #ifndef JEMALLOC_ENABLE_INLINE -# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# ifdef JEMALLOC_ATOMIC_U64 uint64_t atomic_add_u64(uint64_t *p, uint64_t x); uint64_t atomic_sub_u64(uint64_t *p, uint64_t x); bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s); @@ -50,7 +50,7 @@ void atomic_write_u(unsigned *p, unsigned x); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +#ifdef JEMALLOC_ATOMIC_U64 # if (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_u64(uint64_t *p, uint64_t x) { diff --git a/include/jemalloc/internal/atomic_types.h b/include/jemalloc/internal/atomic_types.h new file mode 100644 index 00000000..0fd5e5b5 --- /dev/null +++ b/include/jemalloc/internal/atomic_types.h @@ -0,0 +1,8 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H +#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H + +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# define JEMALLOC_ATOMIC_U64 +#endif + +#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index bace9c46..7e9c24b7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -380,6 +380,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/nstime_types.h" #include "jemalloc/internal/util_types.h" +#include "jemalloc/internal/atomic_types.h" #include "jemalloc/internal/spin_types.h" #include "jemalloc/internal/prng_types.h" #include "jemalloc/internal/ticker_types.h" @@ -419,10 +420,10 @@ typedef unsigned szind_t; #include "jemalloc/internal/extent_structs.h" #include "jemalloc/internal/extent_dss_structs.h" #include "jemalloc/internal/base_structs.h" +#include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/arena_structs_b.h" #include "jemalloc/internal/rtree_structs.h" #include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/tsd_structs.h" @@ -902,6 +903,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) { * Include portions of arena code interleaved with tcache code in order to * resolve circular dependencies. */ +#include "jemalloc/internal/prof_inlines_a.h" #include "jemalloc/internal/arena_inlines_a.h" #ifndef JEMALLOC_ENABLE_INLINE @@ -1163,8 +1165,7 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size, } #endif -#include "jemalloc/internal/prof_inlines.h" - +#include "jemalloc/internal/prof_inlines_b.h" #ifdef __cplusplus } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index ab5a672c..4e799915 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -54,8 +54,6 @@ arena_prefork1 arena_prefork2 arena_prefork3 arena_prof_accum -arena_prof_accum_impl -arena_prof_accum_locked arena_prof_promote arena_prof_tctx_get arena_prof_tctx_reset @@ -364,6 +362,9 @@ prng_range_zu prng_state_next_u32 prng_state_next_u64 prng_state_next_zu +prof_accum_add +prof_accum_cancel +prof_accum_init prof_active prof_active_get prof_active_get_unlocked diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h index 76505f82..f3b6f8d3 100644 --- a/include/jemalloc/internal/prof_externs.h +++ b/include/jemalloc/internal/prof_externs.h @@ -55,6 +55,7 @@ extern prof_dump_header_t *prof_dump_header; void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs, uint64_t *accumbytes); #endif +bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum); void prof_idump(tsdn_t *tsdn); bool prof_mdump(tsd_t *tsd, const char *filename); void prof_gdump(tsdn_t *tsdn); diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h new file mode 100644 index 00000000..d77635a8 --- /dev/null +++ b/include/jemalloc/internal/prof_inlines_a.h @@ -0,0 +1,76 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H +#define JEMALLOC_INTERNAL_PROF_INLINES_A_H + +#ifndef JEMALLOC_ENABLE_INLINE +bool prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, + uint64_t accumbytes); +void prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_INLINE bool +prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) { + cassert(config_prof); + + bool overflow; + uint64_t a0, a1; + + /* + * If the application allocates fast enough (and/or if idump is slow + * enough), extreme overflow here (a1 >= prof_interval * 2) can cause + * idump trigger coalescing. This is an intentional mechanism that + * avoids rate-limiting allocation. + */ +#ifdef JEMALLOC_ATOMIC_U64 + do { + a0 = atomic_read_u64(&prof_accum->accumbytes); + a1 = a0 + accumbytes; + assert(a1 >= a0); + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + } while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = a0 + accumbytes; + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif + return overflow; +} + +JEMALLOC_INLINE void +prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) { + cassert(config_prof); + + /* + * Cancel out as much of the excessive prof_accumbytes increase as + * possible without underflowing. Interval-triggered dumps occur + * slightly more often than intended as a result of incomplete + * canceling. + */ + uint64_t a0, a1; +#ifdef JEMALLOC_ATOMIC_U64 + do { + a0 = atomic_read_u64(&prof_accum->accumbytes); + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - + usize) : 0; + } while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) : + 0; + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif +} +#endif + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines_b.h similarity index 98% rename from include/jemalloc/internal/prof_inlines.h rename to include/jemalloc/internal/prof_inlines_b.h index aba2936a..9e969a07 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -1,5 +1,5 @@ -#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H -#define JEMALLOC_INTERNAL_PROF_INLINES_H +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H +#define JEMALLOC_INTERNAL_PROF_INLINES_B_H #ifndef JEMALLOC_ENABLE_INLINE bool prof_active_get_unlocked(void); @@ -237,4 +237,4 @@ prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) { } #endif -#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */ +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */ diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h index caae1257..afff6aa5 100644 --- a/include/jemalloc/internal/prof_structs.h +++ b/include/jemalloc/internal/prof_structs.h @@ -15,6 +15,13 @@ typedef struct { } prof_unwind_data_t; #endif +struct prof_accum_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; +#endif + uint64_t accumbytes; +}; + struct prof_cnt_s { /* Profiling counters. */ uint64_t curobjs; diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h index ff0db65e..1eff995e 100644 --- a/include/jemalloc/internal/prof_types.h +++ b/include/jemalloc/internal/prof_types.h @@ -2,6 +2,7 @@ #define JEMALLOC_INTERNAL_PROF_TYPES_H typedef struct prof_bt_s prof_bt_t; +typedef struct prof_accum_s prof_accum_t; typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_tctx_s prof_tctx_t; typedef struct prof_gctx_s prof_gctx_t; diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h index 29299168..f919cc5a 100644 --- a/include/jemalloc/internal/witness_types.h +++ b/include/jemalloc/internal/witness_types.h @@ -47,6 +47,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *, #define WITNESS_RANK_ARENA_LARGE WITNESS_RANK_LEAF #define WITNESS_RANK_DSS WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACCUM WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF diff --git a/src/arena.c b/src/arena.c index 345c57df..40db9d1d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1148,19 +1148,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr, extent_usize_set(extent, usize); - /* - * Cancel out as much of the excessive prof_accumbytes increase as - * possible without underflowing. Interval-triggered dumps occur - * slightly more often than intended as a result of incomplete - * canceling. - */ - malloc_mutex_lock(tsdn, &arena->lock); - if (arena->prof_accumbytes >= LARGE_MINCLASS - usize) { - arena->prof_accumbytes -= LARGE_MINCLASS - usize; - } else { - arena->prof_accumbytes = 0; - } - malloc_mutex_unlock(tsdn, &arena->lock); + prof_accum_cancel(tsdn, &arena->prof_accum, usize); assert(isalloc(tsdn, extent, ptr) == usize); } @@ -1574,7 +1562,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } if (config_prof) { - arena->prof_accumbytes = 0; + if (prof_accum_init(tsdn, &arena->prof_accum)) { + goto label_error; + } } if (config_cache_oblivious) { diff --git a/src/prof.c b/src/prof.c index 5aeefb28..13fa20d3 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1753,6 +1753,20 @@ prof_fdump(void) { prof_dump(tsd, false, filename, opt_prof_leak); } +bool +prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) { + cassert(config_prof); + +#ifndef JEMALLOC_ATOMIC_U64 + if (malloc_mutex_init(&prof_accum->mtx, "prof_accum", + WITNESS_RANK_PROF_ACCUM)) { + return true; + } +#endif + prof_accum->accumbytes = 0; + return false; +} + void prof_idump(tsdn_t *tsdn) { tsd_t *tsd; diff --git a/src/tcache.c b/src/tcache.c index 94c45707..f38c2d5d 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -200,7 +200,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, } if ((config_prof || config_stats) && locked_arena == arena) { if (config_prof) { - idump = arena_prof_accum_locked(arena, + idump = arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; }