diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 85c6cc46..40c4286c 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -93,7 +93,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, if (config_stats) { bin->tstats.nrequests++; } - tcache_event(tsd, tcache); return ret; } @@ -150,7 +149,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } } - tcache_event(tsd, tcache); return ret; } diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h index 60261fc0..9fd39263 100644 --- a/include/jemalloc/internal/tcache_types.h +++ b/include/jemalloc/internal/tcache_types.h @@ -43,10 +43,13 @@ typedef struct tcaches_s tcaches_t; */ #define TCACHE_GC_SWEEP 8192 -/* Number of tcache allocation/deallocation events between incremental GCs. */ +/* Number of tcache deallocation events between incremental GCs. */ #define TCACHE_GC_INCR \ ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1)) +/* Number of allocation bytes between tcache incremental GCs. */ +#define TCACHE_GC_INCR_BYTES 65536U + /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */ #define TCACHE_ZERO_INITIALIZER {{0}} diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h index 6aa334fc..3da9f0a6 100644 --- a/include/jemalloc/internal/thread_event.h +++ b/include/jemalloc/internal/thread_event.h @@ -33,6 +33,7 @@ void thread_event_boot(); * E(event, (condition)) */ #define ITERATE_OVER_ALL_EVENTS \ + E(tcache_gc, (TCACHE_GC_INCR_BYTES > 0)) \ E(prof_sample, (config_prof && opt_prof)) #define E(event, condition) \ diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 60500df7..17bfc886 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -29,6 +29,7 @@ * x: narenas_tdata * l: thread_allocated_last_event * j: thread_allocated_next_event + * g: tcache_gc_event_wait * w: prof_sample_event_wait (config_prof) * x: prof_sample_last_event (config_prof) * p: prof_tdata (config_prof) @@ -46,11 +47,11 @@ * |---------------------------- 2nd cacheline ----------------------------| * | [c * 64 ........ ........ ........ ........ ........ ........ .......] | * |---------------------------- 3nd cacheline ----------------------------| - * | [c * 32 ........ ........ .......] llllllll jjjjjjjj wwwwwwww xxxxxxxx | + * | [c * 32 ........ ........ .......] llllllll jjjjjjjj gggggggg wwwwwwww | * +---------------------------- 4th cacheline ----------------------------+ - * | pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ | + * | xxxxxxxx pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ | * +---------------------------- 5th cacheline ----------------------------+ - * | ........ ..b][t.. ........ ........ ........ ........ ........ ........ | + * | ........ ........ ..b][t.. ........ ........ ........ ........ ........ | * +-------------------------------------------------------------------------+ * Note: the entire tcache is embedded into TSD and spans multiple cachelines. * @@ -83,6 +84,7 @@ typedef void (*test_callback_t)(int *); O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \ O(thread_allocated_last_event, uint64_t, uint64_t) \ O(thread_allocated_next_event, uint64_t, uint64_t) \ + O(tcache_gc_event_wait, uint64_t, uint64_t) \ O(prof_sample_event_wait, uint64_t, uint64_t) \ O(prof_sample_last_event, uint64_t, uint64_t) \ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ @@ -113,6 +115,7 @@ typedef void (*test_callback_t)(int *); /* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER, \ /* thread_allocated_last_event */ 0, \ /* thread_allocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \ + /* tcache_gc_event_wait */ THREAD_EVENT_MIN_START_WAIT, \ /* prof_sample_event_wait */ THREAD_EVENT_MIN_START_WAIT, \ /* prof_sample_last_event */ 0, \ /* prof_tdata */ NULL, \ diff --git a/src/jemalloc.c b/src/jemalloc.c index 264b3f3f..10735121 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2350,10 +2350,6 @@ je_malloc(size_t size) { tcache_t *tcache = tsd_tcachep_get(tsd); - if (unlikely(ticker_trytick(&tcache->gc_ticker))) { - return malloc_default(size); - } - szind_t ind = sz_size2index_lookup(size); /* * The thread_allocated counter in tsd serves as a general purpose diff --git a/src/thread_event.c b/src/thread_event.c index 312dff26..33d669aa 100644 --- a/src/thread_event.c +++ b/src/thread_event.c @@ -18,6 +18,17 @@ static void thread_##event##_event_handler(tsd_t *tsd); ITERATE_OVER_ALL_EVENTS #undef E +static void +thread_tcache_gc_event_handler(tsd_t *tsd) { + assert(TCACHE_GC_INCR_BYTES > 0); + assert(tcache_gc_event_wait_get(tsd) == 0U); + thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES); + tcache_t *tcache = tcache_get(tsd); + if (tcache != NULL) { + tcache_event_hard(tsd, tcache); + } +} + static void thread_prof_sample_event_handler(tsd_t *tsd) { assert(config_prof && opt_prof); diff --git a/src/tsd.c b/src/tsd.c index a31f6b96..3fa43d30 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -233,6 +233,10 @@ tsd_data_init(tsd_t *tsd) { *tsd_offset_statep_get(tsd) = config_debug ? 0 : (uint64_t)(uintptr_t)tsd; + if (TCACHE_GC_INCR_BYTES > 0) { + thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES); + } + return tsd_tcache_enabled_data_init(tsd); } diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c index 6817262b..cf5b2e59 100644 --- a/test/unit/thread_event.c +++ b/test/unit/thread_event.c @@ -9,8 +9,11 @@ TEST_BEGIN(test_next_event_fast_roll_back) { THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX); thread_allocated_next_event_fast_set(tsd, THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX); - prof_sample_event_wait_set(tsd, +#define E(event, condition) \ + event##_event_wait_set(tsd, \ THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX); + ITERATE_OVER_ALL_EVENTS +#undef E void *p = malloc(16U); assert_ptr_not_null(p, "malloc() failed"); free(p); @@ -25,8 +28,11 @@ TEST_BEGIN(test_next_event_fast_resume) { thread_allocated_next_event_set(tsd, THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U); thread_allocated_next_event_fast_set(tsd, 0); - prof_sample_event_wait_set(tsd, +#define E(event, condition) \ + event##_event_wait_set(tsd, \ THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U); + ITERATE_OVER_ALL_EVENTS +#undef E void *p = malloc(SC_LOOKUP_MAXCLASS); assert_ptr_not_null(p, "malloc() failed"); free(p);