diff --git a/Makefile.in b/Makefile.in index 7365a923..83f04e64 100644 --- a/Makefile.in +++ b/Makefile.in @@ -135,7 +135,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/nstime.c \ $(srcroot)src/pa.c \ $(srcroot)src/pa_extra.c \ - $(srcroot)src/pai.c \ $(srcroot)src/pac.c \ $(srcroot)src/pages.c \ $(srcroot)src/peak_event.c \ @@ -230,6 +229,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/hook.c \ $(srcroot)test/unit/hpa.c \ + $(srcroot)test/unit/hpa_sec_integration.c \ $(srcroot)test/unit/hpa_thp_always.c \ $(srcroot)test/unit/hpa_vectorized_madvise.c \ $(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \ diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index cf191aeb..1d004635 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -46,7 +46,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats, - hpa_shard_stats_t *hpastats, sec_stats_t *secstats); + hpa_shard_stats_t *hpastats); void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena); edata_t *arena_extent_alloc_large( tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index b290411b..82035fe3 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -51,7 +51,6 @@ typedef struct ctl_arena_stats_s { arena_stats_large_t lstats[SC_NSIZES - SC_NBINS]; pac_estats_t estats[SC_NPSIZES]; hpa_shard_stats_t hpastats; - sec_stats_t secstats; } ctl_arena_stats_t; typedef struct ctl_stats_s { diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h index 06567740..dc7725b7 100644 --- a/include/jemalloc/internal/hpa.h +++ b/include/jemalloc/internal/hpa.h @@ -12,6 +12,7 @@ #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/pai.h" #include "jemalloc/internal/psset.h" +#include "jemalloc/internal/sec.h" typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t; struct hpa_shard_nonderived_stats_s { @@ -57,6 +58,7 @@ typedef struct hpa_shard_stats_s hpa_shard_stats_t; struct hpa_shard_stats_s { psset_stats_t psset_stats; hpa_shard_nonderived_stats_t nonderived_stats; + sec_stats_t secstats; }; typedef struct hpa_shard_s hpa_shard_t; @@ -69,14 +71,17 @@ struct hpa_shard_s { /* The central allocator we get our hugepages from. */ hpa_central_t *central; + /* Protects most of this shard's state. */ malloc_mutex_t mtx; + /* * Guards the shard's access to the central allocator (preventing * multiple threads operating on this shard from accessing the central * allocator). */ malloc_mutex_t grow_mtx; + /* The base metadata allocator. */ base_t *base; @@ -87,6 +92,9 @@ struct hpa_shard_s { */ edata_cache_fast_t ecf; + /* Small extent cache (not guarded by mtx) */ + JEMALLOC_ALIGNED(CACHELINE) sec_t sec; + psset_t psset; /* @@ -142,9 +150,9 @@ bool hpa_hugepage_size_exceeds_limit(void); * just that it can function properly given the system it's running on. */ bool hpa_supported(void); -bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap, - base_t *base, edata_cache_t *edata_cache, unsigned ind, - const hpa_shard_opts_t *opts); +bool hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central, + emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind, + const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts); void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src); void hpa_shard_stats_merge( @@ -157,6 +165,8 @@ void hpa_shard_stats_merge( */ void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard); +/* Flush caches that shard may be using */ +void hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_set_deferral_allowed( tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed); @@ -164,8 +174,9 @@ void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard); /* * We share the fork ordering with the PA and arena prefork handling; that's why - * these are 3 and 4 rather than 0 and 1. + * these are 2, 3 and 4 rather than 0 and 1. */ +void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard); diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h index 3f2d10b0..f3910ad8 100644 --- a/include/jemalloc/internal/pa.h +++ b/include/jemalloc/internal/pa.h @@ -96,12 +96,6 @@ struct pa_shard_s { /* Allocates from a PAC. */ pac_t pac; - /* - * We place a small extent cache in front of the HPA, since we intend - * these configurations to use many fewer arenas, and therefore have a - * higher risk of hot locks. - */ - sec_t hpa_sec; hpa_shard_t hpa_shard; /* The source of edata_t objects. */ @@ -166,6 +160,9 @@ void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard); */ void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard); +/* Flush any caches used by shard */ +void pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard); + /* Gets an edata for the given allocation. */ edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment, bool slab, szind_t szind, bool zero, bool guarded, @@ -233,8 +230,7 @@ void pa_shard_basic_stats_merge( void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out, - hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out, - size_t *resident); + hpa_shard_stats_t *hpa_stats_out, size_t *resident); /* * Reads the PA-owned mutex stats into the output stats array, at the diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h index 1d924657..9b4c257b 100644 --- a/include/jemalloc/internal/pai.h +++ b/include/jemalloc/internal/pai.h @@ -13,15 +13,6 @@ struct pai_s { edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, bool guarded, bool frequent_reuse, bool *deferred_work_generated); - /* - * Returns the number of extents added to the list (which may be fewer - * than requested, in case of OOM). The list should already be - * initialized. The only alignment guarantee is page-alignment, and - * the results are not necessarily zeroed. - */ - size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, bool frequent_reuse, - bool *deferred_work_generated); bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated); @@ -29,9 +20,6 @@ struct pai_s { size_t old_size, size_t new_size, bool *deferred_work_generated); void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated); - /* This function empties out list as a side-effect of being called. */ - void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self, - edata_list_active_t *list, bool *deferred_work_generated); uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self); }; @@ -47,14 +35,6 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, frequent_reuse, deferred_work_generated); } -static inline size_t -pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs, - edata_list_active_t *results, bool frequent_reuse, - bool *deferred_work_generated) { - return self->alloc_batch(tsdn, self, size, nallocs, results, - frequent_reuse, deferred_work_generated); -} - static inline bool pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated) { @@ -75,26 +55,9 @@ pai_dalloc( self->dalloc(tsdn, self, edata, deferred_work_generated); } -static inline void -pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list, - bool *deferred_work_generated) { - self->dalloc_batch(tsdn, self, list, deferred_work_generated); -} - static inline uint64_t pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) { return self->time_until_deferred_work(tsdn, self); } -/* - * An implementation of batch allocation that simply calls alloc once for - * each item in the list. - */ -size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, bool frequent_reuse, - bool *deferred_work_generated); -/* Ditto, for dalloc. */ -void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self, - edata_list_active_t *list, bool *deferred_work_generated); - #endif /* JEMALLOC_INTERNAL_PAI_H */ diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h index 50daf066..cc458b9d 100644 --- a/include/jemalloc/internal/sec.h +++ b/include/jemalloc/internal/sec.h @@ -17,91 +17,104 @@ * knowledge of the underlying PAI implementation). */ -/* - * For now, this is just one field; eventually, we'll probably want to get more - * fine-grained data out (like per-size class statistics). - */ +typedef struct sec_bin_stats_s sec_bin_stats_t; +struct sec_bin_stats_s { + /* Number of alloc requests that did not find extent in this bin */ + size_t nmisses; + /* Number of successful alloc requests. */ + size_t nhits; + /* Number of dallocs causing the flush */ + size_t ndalloc_flush; + /* Number of dallocs not causing the flush */ + size_t ndalloc_noflush; + /* Number of fills that hit max_bytes */ + size_t noverfills; +}; typedef struct sec_stats_s sec_stats_t; struct sec_stats_s { /* Sum of bytes_cur across all shards. */ size_t bytes; + + /* Totals of bin_stats. */ + sec_bin_stats_t total; }; +static inline void +sec_bin_stats_init(sec_bin_stats_t *stats) { + stats->ndalloc_flush = 0; + stats->nmisses = 0; + stats->nhits = 0; + stats->ndalloc_noflush = 0; + stats->noverfills = 0; +} + +static inline void +sec_bin_stats_accum(sec_bin_stats_t *dst, sec_bin_stats_t *src) { + dst->nmisses += src->nmisses; + dst->nhits += src->nhits; + dst->ndalloc_flush += src->ndalloc_flush; + dst->ndalloc_noflush += src->ndalloc_noflush; + dst->noverfills += src->noverfills; +} + static inline void sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) { dst->bytes += src->bytes; + sec_bin_stats_accum(&dst->total, &src->total); } /* A collections of free extents, all of the same size. */ typedef struct sec_bin_s sec_bin_t; struct sec_bin_s { /* - * When we fail to fulfill an allocation, we do a batch-alloc on the - * underlying allocator to fill extra items, as well. We drop the SEC - * lock while doing so, to allow operations on other bins to succeed. - * That introduces the possibility of other threads also trying to - * allocate out of this bin, failing, and also going to the backing - * allocator. To avoid a thundering herd problem in which lots of - * threads do batch allocs and overfill this bin as a result, we only - * allow one batch allocation at a time for a bin. This bool tracks - * whether or not some thread is already batch allocating. - * - * Eventually, the right answer may be a smarter sharding policy for the - * bins (e.g. a mutex per bin, which would also be more scalable - * generally; the batch-allocating thread could hold it while - * batch-allocating). + * Protects the data members of the bin. */ - bool being_batch_filled; + malloc_mutex_t mtx; /* - * Number of bytes in this particular bin (as opposed to the - * sec_shard_t's bytes_cur. This isn't user visible or reported in - * stats; rather, it allows us to quickly determine the change in the - * centralized counter when flushing. + * Number of bytes in this particular bin. */ size_t bytes_cur; edata_list_active_t freelist; -}; - -typedef struct sec_shard_s sec_shard_t; -struct sec_shard_s { - /* - * We don't keep per-bin mutexes, even though that would allow more - * sharding; this allows global cache-eviction, which in turn allows for - * better balancing across free lists. - */ - malloc_mutex_t mtx; - /* - * A SEC may need to be shut down (i.e. flushed of its contents and - * prevented from further caching). To avoid tricky synchronization - * issues, we just track enabled-status in each shard, guarded by a - * mutex. In practice, this is only ever checked during brief races, - * since the arena-level atomic boolean tracking HPA enabled-ness means - * that we won't go down these pathways very often after custom extent - * hooks are installed. - */ - bool enabled; - sec_bin_t *bins; - /* Number of bytes in all bins in the shard. */ - size_t bytes_cur; - /* The next pszind to flush in the flush-some pathways. */ - pszind_t to_flush_next; + sec_bin_stats_t stats; }; typedef struct sec_s sec_t; struct sec_s { - pai_t pai; - pai_t *fallback; - - sec_opts_t opts; - sec_shard_t *shards; - pszind_t npsizes; + sec_opts_t opts; + sec_bin_t *bins; + pszind_t npsizes; }; -bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback, - const sec_opts_t *opts); -void sec_flush(tsdn_t *tsdn, sec_t *sec); -void sec_disable(tsdn_t *tsdn, sec_t *sec); +static inline bool +sec_is_used(sec_t *sec) { + return sec->opts.nshards != 0; +} + +static inline bool +sec_size_supported(sec_t *sec, size_t size) { + return sec_is_used(sec) && size <= sec->opts.max_alloc; +} + +/* If sec does not have extent available, it will return NULL. */ +edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size); +void sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, + edata_list_active_t *result, size_t nallocs); + +/* + * Upon return dalloc_list may be empty if edata is consumed by sec or non-empty + * if there are extents that need to be flushed from cache. Please note, that + * if we need to flush, extent(s) returned in the list to be deallocated + * will almost certainly not contain the one being dalloc-ed (that one will be + * considered "hot" and preserved in the cache, while "colder" ones are + * returned). + */ +void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list); + +bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts); + +/* Fills to_flush with extents that need to be deallocated */ +void sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush); /* * Morally, these two stats methods probably ought to be a single one (and the diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h index e0699d7a..039d423c 100644 --- a/include/jemalloc/internal/sec_opts.h +++ b/include/jemalloc/internal/sec_opts.h @@ -12,46 +12,39 @@ typedef struct sec_opts_s sec_opts_t; struct sec_opts_s { /* * We don't necessarily always use all the shards; requests are - * distributed across shards [0, nshards - 1). + * distributed across shards [0, nshards - 1). Once thread picks a + * shard it will always use that one. If this value is set to 0 sec is + * not used. */ size_t nshards; /* * We'll automatically refuse to cache any objects in this sec if - * they're larger than max_alloc bytes, instead forwarding such objects - * directly to the fallback. + * they're larger than max_alloc bytes. */ size_t max_alloc; /* - * Exceeding this amount of cached extents in a shard causes us to start - * flushing bins in that shard until we fall below bytes_after_flush. + * Exceeding this amount of cached extents in a bin causes us to flush + * until we are 1/4 below max_bytes. */ size_t max_bytes; - /* - * The number of bytes (in all bins) we flush down to when we exceed - * bytes_cur. We want this to be less than bytes_cur, because - * otherwise we could get into situations where a shard undergoing - * net-deallocation keeps bytes_cur very near to max_bytes, so that - * most deallocations get immediately forwarded to the underlying PAI - * implementation, defeating the point of the SEC. - */ - size_t bytes_after_flush; /* * When we can't satisfy an allocation out of the SEC because there are - * no available ones cached, we allocate multiple of that size out of - * the fallback allocator. Eventually we might want to do something - * cleverer, but for now we just grab a fixed number. + * no available ones cached, allocator will allocate a batch with extra + * batch_fill_extra extents of the same size. */ size_t batch_fill_extra; }; +#define SEC_OPTS_NSHARDS_DEFAULT 2 +#define SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT 3 +#define SEC_OPTS_MAX_ALLOC_DEFAULT ((32 * 1024) < PAGE ? PAGE : (32 * 1024)) +#define SEC_OPTS_MAX_BYTES_DEFAULT \ + ((256 * 1024) < (4 * SEC_OPTS_MAX_ALLOC_DEFAULT) \ + ? (4 * SEC_OPTS_MAX_ALLOC_DEFAULT) \ + : (256 * 1024)) + #define SEC_OPTS_DEFAULT \ - { \ - /* nshards */ \ - 4, /* max_alloc */ \ - (32 * 1024) < PAGE ? PAGE : (32 * 1024), /* max_bytes */ \ - 256 * 1024, /* bytes_after_flush */ \ - 128 * 1024, /* batch_fill_extra */ \ - 0 \ - } + {SEC_OPTS_NSHARDS_DEFAULT, SEC_OPTS_MAX_ALLOC_DEFAULT, \ + SEC_OPTS_MAX_BYTES_DEFAULT, SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT} #endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */ diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 7ca3c347..0a426ff5 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -46,7 +46,7 @@ enum witness_rank_e { WITNESS_RANK_DECAY = WITNESS_RANK_CORE, WITNESS_RANK_TCACHE_QL, - WITNESS_RANK_SEC_SHARD, + WITNESS_RANK_SEC_BIN, WITNESS_RANK_EXTENT_GROW, WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW, diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index bfb62d78..1e8def75 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -74,7 +74,6 @@ - diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index 26408c8e..f6e340cf 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -106,9 +106,6 @@ Source Files - - Source Files - Source Files diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index 037eb724..45ddf73d 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -74,7 +74,6 @@ - diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index 26408c8e..f6e340cf 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -106,9 +106,6 @@ Source Files - - Source Files - Source Files diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj index bd6595b1..f1a5158a 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj @@ -74,7 +74,6 @@ - diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters index 26408c8e..f6e340cf 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters @@ -106,9 +106,6 @@ Source Files - - Source Files - Source Files diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj index 3f880176..a6f92ccf 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj @@ -74,7 +74,6 @@ - diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters index 26408c8e..f6e340cf 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters @@ -106,9 +106,6 @@ Source Files - - Source Files - Source Files diff --git a/src/arena.c b/src/arena.c index 664ed6a3..5b144c63 100644 --- a/src/arena.c +++ b/src/arena.c @@ -89,7 +89,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats, - hpa_shard_stats_t *hpastats, sec_stats_t *secstats) { + hpa_shard_stats_t *hpastats) { cassert(config_stats); arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms, @@ -159,7 +159,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, } pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats, - estats, hpastats, secstats, &astats->resident); + estats, hpastats, &astats->resident); LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); @@ -529,7 +529,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) { * as possible", including flushing any caches (for situations * like thread death, or manual purge calls). */ - sec_flush(tsdn, &arena->pa_shard.hpa_sec); + pa_shard_flush(tsdn, &arena->pa_shard); } if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) { return; diff --git a/src/ctl.c b/src/ctl.c index 553c58ad..1260e197 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -115,7 +115,6 @@ CTL_PROTO(opt_hpa_dirty_mult) CTL_PROTO(opt_hpa_sec_nshards) CTL_PROTO(opt_hpa_sec_max_alloc) CTL_PROTO(opt_hpa_sec_max_bytes) -CTL_PROTO(opt_hpa_sec_bytes_after_flush) CTL_PROTO(opt_hpa_sec_batch_fill_extra) CTL_PROTO(opt_huge_arena_pac_thp) CTL_PROTO(opt_metadata_thp) @@ -339,6 +338,11 @@ CTL_PROTO(stats_arenas_i_tcache_stashed_bytes) CTL_PROTO(stats_arenas_i_resident) CTL_PROTO(stats_arenas_i_abandoned_vm) CTL_PROTO(stats_arenas_i_hpa_sec_bytes) +CTL_PROTO(stats_arenas_i_hpa_sec_hits) +CTL_PROTO(stats_arenas_i_hpa_sec_misses) +CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_flush) +CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_noflush) +CTL_PROTO(stats_arenas_i_hpa_sec_overfills) INDEX_PROTO(stats_arenas_i) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) @@ -486,7 +490,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)}, {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)}, {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)}, {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)}, - {NAME("hpa_sec_bytes_after_flush"), CTL(opt_hpa_sec_bytes_after_flush)}, {NAME("hpa_sec_batch_fill_extra"), CTL(opt_hpa_sec_batch_fill_extra)}, {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)}, {NAME("metadata_thp"), CTL(opt_metadata_thp)}, @@ -826,6 +829,12 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("resident"), CTL(stats_arenas_i_resident)}, {NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)}, {NAME("hpa_sec_bytes"), CTL(stats_arenas_i_hpa_sec_bytes)}, + {NAME("hpa_sec_hits"), CTL(stats_arenas_i_hpa_sec_hits)}, + {NAME("hpa_sec_misses"), CTL(stats_arenas_i_hpa_sec_misses)}, + {NAME("hpa_sec_dalloc_noflush"), + CTL(stats_arenas_i_hpa_sec_dalloc_noflush)}, + {NAME("hpa_sec_dalloc_flush"), CTL(stats_arenas_i_hpa_sec_dalloc_flush)}, + {NAME("hpa_sec_overfills"), CTL(stats_arenas_i_hpa_sec_overfills)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, {NAME("large"), CHILD(named, stats_arenas_i_large)}, {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, @@ -1066,7 +1075,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) { &ctl_arena->pdirty, &ctl_arena->pmuzzy, &ctl_arena->astats->astats, ctl_arena->astats->bstats, ctl_arena->astats->lstats, ctl_arena->astats->estats, - &ctl_arena->astats->hpastats, &ctl_arena->astats->secstats); + &ctl_arena->astats->hpastats); for (i = 0; i < SC_NBINS; i++) { bin_stats_t *bstats = @@ -1258,7 +1267,6 @@ ctl_arena_stats_sdmerge( /* Merge HPA stats. */ hpa_shard_stats_accum(&sdstats->hpastats, &astats->hpastats); - sec_stats_accum(&sdstats->secstats, &astats->secstats); } } @@ -2175,11 +2183,8 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t) CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t) CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t) CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t) -CTL_RO_NL_GEN( - opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush, size_t) CTL_RO_NL_GEN( opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra, size_t) - CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool) CTL_RO_NL_GEN( opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *) @@ -3869,7 +3874,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes, - arenas_i(mib[2])->astats->secstats.bytes, size_t) + arenas_i(mib[2])->astats->hpastats.secstats.bytes, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_hits, + arenas_i(mib[2])->astats->hpastats.secstats.total.nhits, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_misses, + arenas_i(mib[2])->astats->hpastats.secstats.total.nmisses, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_flush, + arenas_i(mib[2])->astats->hpastats.secstats.total.ndalloc_flush, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_noflush, + arenas_i(mib[2])->astats->hpastats.secstats.total.ndalloc_noflush, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_overfills, + arenas_i(mib[2])->astats->hpastats.secstats.total.noverfills, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, arenas_i(mib[2])->astats->allocated_small, size_t) diff --git a/src/hpa.c b/src/hpa.c index cc330379..7e5b5f72 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -11,19 +11,17 @@ static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, bool guarded, bool frequent_reuse, bool *deferred_work_generated); -static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, bool frequent_reuse, - bool *deferred_work_generated); static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated); static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size, bool *deferred_work_generated); static void hpa_dalloc( tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated); -static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, - edata_list_active_t *list, bool *deferred_work_generated); static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self); +static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, + edata_list_active_t *list, bool *deferred_work_generated); + const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"}; bool opt_experimental_hpa_start_huge_if_thp_always = true; @@ -74,9 +72,9 @@ hpa_do_consistency_checks(hpa_shard_t *shard) { } bool -hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap, - base_t *base, edata_cache_t *edata_cache, unsigned ind, - const hpa_shard_opts_t *opts) { +hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central, + emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind, + const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts) { /* malloc_conf processing should have filtered out these cases. */ assert(hpa_supported()); bool err; @@ -118,13 +116,16 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap, * operating on corrupted data. */ shard->pai.alloc = &hpa_alloc; - shard->pai.alloc_batch = &hpa_alloc_batch; shard->pai.expand = &hpa_expand; shard->pai.shrink = &hpa_shrink; shard->pai.dalloc = &hpa_dalloc; - shard->pai.dalloc_batch = &hpa_dalloc_batch; shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work; + err = sec_init(tsdn, &shard->sec, base, sec_opts); + if (err) { + return true; + } + hpa_do_consistency_checks(shard); return false; @@ -151,6 +152,7 @@ hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) { psset_stats_accum(&dst->psset_stats, &src->psset_stats); hpa_shard_nonderived_stats_accum( &dst->nonderived_stats, &src->nonderived_stats); + sec_stats_accum(&dst->secstats, &src->secstats); } void @@ -164,6 +166,8 @@ hpa_shard_stats_merge( hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, &shard->stats); malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->grow_mtx); + + sec_stats_merge(tsdn, &shard->sec, &dst->secstats); } static bool @@ -825,37 +829,9 @@ hpa_from_pai(pai_t *self) { return (hpa_shard_t *)self; } -static size_t -hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs, - edata_list_active_t *results, bool frequent_reuse, - bool *deferred_work_generated) { - assert(nallocs > 0); - assert((size & PAGE_MASK) == 0); - witness_assert_depth_to_rank( - tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - hpa_shard_t *shard = hpa_from_pai(self); - - /* - * frequent_use here indicates this request comes from the arena bins, - * in which case it will be split into slabs, and therefore there is no - * intrinsic slack in the allocation (the entire range of allocated size - * will be accessed). - * - * In this case bypass the slab_max_alloc limit (if still within the - * huge page size). These requests do not concern internal - * fragmentation with huge pages (again, the full size will be used). - */ - if (!(frequent_reuse && size <= HUGEPAGE) - && (size > shard->opts.slab_max_alloc)) { - return 0; - } - - size_t nsuccess = hpa_alloc_batch_psset( - tsdn, shard, size, nallocs, results, deferred_work_generated); - - witness_assert_depth_to_rank( - tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - +static void +hpa_assert_results( + tsdn_t *tsdn, hpa_shard_t *shard, edata_list_active_t *results) { /* * Guard the sanity checks with config_debug because the loop cannot be * proven non-circular by the compiler, even if everything within the @@ -876,7 +852,6 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs, assert(edata_base_get(edata) != NULL); } } - return nsuccess; } static edata_t * @@ -891,16 +866,52 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, if (alignment > PAGE || zero) { return NULL; } + hpa_shard_t *shard = hpa_from_pai(self); + /* - * An alloc with alignment == PAGE and zero == false is equivalent to a - * batch alloc of 1. Just do that, so we can share code. + * frequent_use here indicates this request comes from the arena bins, + * in which case it will be split into slabs, and therefore there is no + * intrinsic slack in the allocation (the entire range of allocated size + * will be accessed). + * + * In this case bypass the slab_max_alloc limit (if still within the + * huge page size). These requests do not concern internal + * fragmentation with huge pages (again, the full size will be used). */ + if (!(frequent_reuse && size <= HUGEPAGE) + && (size > shard->opts.slab_max_alloc)) { + return NULL; + } + edata_t *edata = sec_alloc(tsdn, &shard->sec, size); + if (edata != NULL) { + return edata; + } + size_t nallocs = sec_size_supported(&shard->sec, size) + ? shard->sec.opts.batch_fill_extra + 1 + : 1; edata_list_active_t results; edata_list_active_init(&results); - size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1, - &results, frequent_reuse, deferred_work_generated); - assert(nallocs == 0 || nallocs == 1); - edata_t *edata = edata_list_active_first(&results); + size_t nsuccess = hpa_alloc_batch_psset( + tsdn, shard, size, nallocs, &results, deferred_work_generated); + hpa_assert_results(tsdn, shard, &results); + edata = edata_list_active_first(&results); + + if (edata != NULL) { + edata_list_active_remove(&results, edata); + assert(nsuccess > 0); + nsuccess--; + } + if (nsuccess > 0) { + assert(sec_size_supported(&shard->sec, size)); + sec_fill(tsdn, &shard->sec, size, &results, nsuccess); + /* Unlikely rollback in case of overfill */ + if (!edata_list_active_empty(&results)) { + hpa_dalloc_batch( + tsdn, self, &results, deferred_work_generated); + } + } + witness_assert_depth_to_rank( + tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); return edata; } @@ -996,10 +1007,19 @@ static void hpa_dalloc( tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) { assert(!edata_guarded_get(edata)); - /* Just a dalloc_batch of size 1; this lets us share logic. */ + edata_list_active_t dalloc_list; edata_list_active_init(&dalloc_list); edata_list_active_append(&dalloc_list, edata); + + hpa_shard_t *shard = hpa_from_pai(self); + sec_dalloc(tsdn, &shard->sec, &dalloc_list); + if (edata_list_active_empty(&dalloc_list)) { + /* sec consumed the pointer */ + *deferred_work_generated = false; + return; + } + /* We may have more than one pointer to flush now */ hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated); } @@ -1063,15 +1083,32 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) { return time_ns; } +static void +hpa_sec_flush_impl(tsdn_t *tsdn, hpa_shard_t *shard) { + edata_list_active_t to_flush; + edata_list_active_init(&to_flush); + + sec_flush(tsdn, &shard->sec, &to_flush); + bool deferred_work_generated; + hpa_dalloc_batch( + tsdn, (pai_t *)shard, &to_flush, &deferred_work_generated); +} + void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) { hpa_do_consistency_checks(shard); + hpa_sec_flush_impl(tsdn, shard); malloc_mutex_lock(tsdn, &shard->mtx); edata_cache_fast_disable(tsdn, &shard->ecf); malloc_mutex_unlock(tsdn, &shard->mtx); } +void +hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard) { + hpa_sec_flush_impl(tsdn, shard); +} + static void hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) { assert(bin_stats->npageslabs == 0); @@ -1093,6 +1130,7 @@ hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) { void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) { hpa_do_consistency_checks(shard); + hpa_shard_flush(tsdn, shard); /* * By the time we're here, the arena code should have dalloc'd all the * active extents, which means we should have eventually evicted @@ -1137,6 +1175,12 @@ hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_unlock(tsdn, &shard->mtx); } +void +hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) { + hpa_do_consistency_checks(shard); + sec_prefork2(tsdn, &shard->sec); +} + void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) { hpa_do_consistency_checks(shard); @@ -1155,6 +1199,7 @@ void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) { hpa_do_consistency_checks(shard); + sec_postfork_parent(tsdn, &shard->sec); malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx); malloc_mutex_postfork_parent(tsdn, &shard->mtx); } @@ -1163,6 +1208,7 @@ void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) { hpa_do_consistency_checks(shard); + sec_postfork_child(tsdn, &shard->sec); malloc_mutex_postfork_child(tsdn, &shard->grow_mtx); malloc_mutex_postfork_child(tsdn, &shard->mtx); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 6844da5a..5d23962d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1013,6 +1013,15 @@ malloc_conf_error( /* However, tolerate experimental features. */ return; } + const char *deprecated[] = {"hpa_sec_bytes_after_flush"}; + const size_t deprecated_cnt = (sizeof(deprecated) + / sizeof(deprecated[0])); + for (size_t i = 0; i < deprecated_cnt; ++i) { + if (strncmp(k, deprecated[i], strlen(deprecated[i])) == 0) { + /* Tolerate deprecated features. */ + return; + } + } had_conf_error = true; } @@ -1685,7 +1694,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], } CONF_CONTINUE; } - CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards, "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true); @@ -1694,13 +1702,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN, CONF_CHECK_MAX, true); CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes, - "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN, - CONF_DONT_CHECK_MAX, true); - CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.bytes_after_flush, - "hpa_sec_bytes_after_flush", PAGE, 0, + "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true); CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra, - "hpa_sec_batch_fill_extra", 0, HUGEPAGE_PAGES, + "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES, CONF_CHECK_MIN, CONF_CHECK_MAX, true); if (CONF_MATCH("slab_sizes")) { diff --git a/src/pa.c b/src/pa.c index becf69b1..a03b0c1c 100644 --- a/src/pa.c +++ b/src/pa.c @@ -67,12 +67,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central, bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) { - if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap, - shard->base, &shard->edata_cache, shard->ind, hpa_opts)) { - return true; - } - if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai, - hpa_sec_opts)) { + if (hpa_shard_init(tsdn, &shard->hpa_shard, &shard->central->hpa, + shard->emap, shard->base, &shard->edata_cache, shard->ind, + hpa_opts, hpa_sec_opts)) { return true; } shard->ever_used_hpa = true; @@ -85,7 +82,6 @@ void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) { atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED); if (shard->ever_used_hpa) { - sec_disable(tsdn, &shard->hpa_sec); hpa_shard_disable(tsdn, &shard->hpa_shard); } } @@ -93,8 +89,13 @@ pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) { void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) { atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED); + pa_shard_flush(tsdn, shard); +} + +void +pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) { if (shard->ever_used_hpa) { - sec_flush(tsdn, &shard->hpa_sec); + hpa_shard_flush(tsdn, &shard->hpa_shard); } } @@ -107,7 +108,6 @@ void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) { pac_destroy(tsdn, &shard->pac); if (shard->ever_used_hpa) { - sec_flush(tsdn, &shard->hpa_sec); hpa_shard_destroy(tsdn, &shard->hpa_shard); } } @@ -115,7 +115,7 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) { static pai_t * pa_get_pai(pa_shard_t *shard, edata_t *edata) { return (edata_pai_get(edata) == EXTENT_PAI_PAC ? &shard->pac.pai - : &shard->hpa_sec.pai); + : &shard->hpa_shard.pai); } edata_t * @@ -128,7 +128,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment, edata_t *edata = NULL; if (!guarded && pa_shard_uses_hpa(shard)) { - edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment, + edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment, zero, /* guarded */ false, slab, deferred_work_generated); } /* diff --git a/src/pa_extra.c b/src/pa_extra.c index 7c2498b7..ff45674f 100644 --- a/src/pa_extra.c +++ b/src/pa_extra.c @@ -17,7 +17,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) { void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) { if (shard->ever_used_hpa) { - sec_prefork2(tsdn, &shard->hpa_sec); + hpa_shard_prefork2(tsdn, &shard->hpa_shard); } } @@ -54,7 +54,6 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) { malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx); malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx); if (shard->ever_used_hpa) { - sec_postfork_parent(tsdn, &shard->hpa_sec); hpa_shard_postfork_parent(tsdn, &shard->hpa_shard); } } @@ -69,7 +68,6 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) { malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx); malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx); if (shard->ever_used_hpa) { - sec_postfork_child(tsdn, &shard->hpa_sec); hpa_shard_postfork_child(tsdn, &shard->hpa_shard); } } @@ -104,8 +102,7 @@ pa_shard_basic_stats_merge( void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out, - hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out, - size_t *resident) { + hpa_shard_stats_t *hpa_stats_out, size_t *resident) { cassert(config_stats); pa_shard_stats_out->pac_stats.retained += @@ -170,7 +167,6 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, if (shard->ever_used_hpa) { hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out); - sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out); } } @@ -204,7 +200,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard, pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data, &shard->hpa_shard.grow_mtx, arena_prof_mutex_hpa_shard_grow); - sec_mutex_stats_read(tsdn, &shard->hpa_sec, + sec_mutex_stats_read(tsdn, &shard->hpa_shard.sec, &mutex_prof_data[arena_prof_mutex_hpa_sec]); } } diff --git a/src/pac.c b/src/pac.c index 361816e9..86001139 100644 --- a/src/pac.c +++ b/src/pac.c @@ -97,11 +97,9 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap, atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED); pac->pai.alloc = &pac_alloc_impl; - pac->pai.alloc_batch = &pai_alloc_batch_default; pac->pai.expand = &pac_expand_impl; pac->pai.shrink = &pac_shrink_impl; pac->pai.dalloc = &pac_dalloc_impl; - pac->pai.dalloc_batch = &pai_dalloc_batch_default; pac->pai.time_until_deferred_work = &pac_time_until_deferred_work; return false; @@ -449,8 +447,8 @@ decay_with_process_madvise(edata_list_inactive_t *decay_extents) { size_t cur = 0, total_bytes = 0; for (edata_t *edata = edata_list_inactive_first(decay_extents); - edata != NULL; - edata = edata_list_inactive_next(decay_extents, edata)) { + edata != NULL; + edata = edata_list_inactive_next(decay_extents, edata)) { size_t pages_bytes = edata_size_get(edata); vec[cur].iov_base = edata_base_get(edata); vec[cur].iov_len = pages_bytes; @@ -511,7 +509,7 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay, } for (edata_t *edata = edata_list_inactive_first(decay_extents); - edata != NULL; edata = edata_list_inactive_first(decay_extents)) { + edata != NULL; edata = edata_list_inactive_first(decay_extents)) { edata_list_inactive_remove(decay_extents, edata); size_t size = edata_size_get(edata); diff --git a/src/pai.c b/src/pai.c deleted file mode 100644 index 3114e658..00000000 --- a/src/pai.c +++ /dev/null @@ -1,32 +0,0 @@ -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - -size_t -pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs, - edata_list_active_t *results, bool frequent_reuse, - bool *deferred_work_generated) { - for (size_t i = 0; i < nallocs; i++) { - bool deferred_by_alloc = false; - edata_t *edata = pai_alloc(tsdn, self, size, PAGE, - /* zero */ false, /* guarded */ false, frequent_reuse, - &deferred_by_alloc); - *deferred_work_generated |= deferred_by_alloc; - if (edata == NULL) { - return i; - } - edata_list_active_append(results, edata); - } - return nallocs; -} - -void -pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list, - bool *deferred_work_generated) { - edata_t *edata; - while ((edata = edata_list_active_first(list)) != NULL) { - bool deferred_by_dalloc = false; - edata_list_active_remove(list, edata); - pai_dalloc(tsdn, self, edata, &deferred_by_dalloc); - *deferred_work_generated |= deferred_by_dalloc; - } -} diff --git a/src/sec.c b/src/sec.c index c827dd5c..5f65362f 100644 --- a/src/sec.c +++ b/src/sec.c @@ -4,95 +4,56 @@ #include "jemalloc/internal/sec.h" #include "jemalloc/internal/jemalloc_probe.h" -static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, - size_t alignment, bool zero, bool guarded, bool frequent_reuse, - bool *deferred_work_generated); -static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, - size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated); -static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, - size_t old_size, size_t new_size, bool *deferred_work_generated); -static void sec_dalloc( - tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated); - -static void +static bool sec_bin_init(sec_bin_t *bin) { - bin->being_batch_filled = false; bin->bytes_cur = 0; + sec_bin_stats_init(&bin->stats); edata_list_active_init(&bin->freelist); + bool err = malloc_mutex_init(&bin->mtx, "sec_bin", WITNESS_RANK_SEC_BIN, + malloc_mutex_rank_exclusive); + if (err) { + return true; + } + + return false; } bool -sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback, - const sec_opts_t *opts) { +sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) { + sec->opts = *opts; + if (opts->nshards == 0) { + return false; + } assert(opts->max_alloc >= PAGE); + /* * Same as tcache, sec do not cache allocs/dallocs larger than * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases * by PAGE and the number of usizes is too large. */ - assert(!sz_large_size_classes_disabled() - || opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD); + assert(opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD); size_t max_alloc = PAGE_FLOOR(opts->max_alloc); pszind_t npsizes = sz_psz2ind(max_alloc) + 1; - size_t sz_shards = opts->nshards * sizeof(sec_shard_t); - size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t); - size_t sz_alloc = sz_shards + sz_bins; - void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE); + size_t ntotal_bins = opts->nshards * (size_t)npsizes; + size_t sz_bins = sizeof(sec_bin_t) * ntotal_bins; + void *dynalloc = base_alloc(tsdn, base, sz_bins, CACHELINE); if (dynalloc == NULL) { return true; } - sec_shard_t *shard_cur = (sec_shard_t *)dynalloc; - sec->shards = shard_cur; - sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards]; - /* Just for asserts, below. */ - sec_bin_t *bin_start = bin_cur; - - for (size_t i = 0; i < opts->nshards; i++) { - sec_shard_t *shard = shard_cur; - shard_cur++; - bool err = malloc_mutex_init(&shard->mtx, "sec_shard", - WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive); - if (err) { + sec->bins = (sec_bin_t *)dynalloc; + for (pszind_t j = 0; j < ntotal_bins; j++) { + if (sec_bin_init(&sec->bins[j])) { return true; } - shard->enabled = true; - shard->bins = bin_cur; - for (pszind_t j = 0; j < npsizes; j++) { - sec_bin_init(&shard->bins[j]); - bin_cur++; - } - shard->bytes_cur = 0; - shard->to_flush_next = 0; } - /* - * Should have exactly matched the bin_start to the first unused byte - * after the shards. - */ - assert((void *)shard_cur == (void *)bin_start); - /* And the last bin to use up the last bytes of the allocation. */ - assert((char *)bin_cur == ((char *)dynalloc + sz_alloc)); - sec->fallback = fallback; - - sec->opts = *opts; sec->npsizes = npsizes; - /* - * Initialize these last so that an improper use of an SEC whose - * initialization failed will segfault in an easy-to-spot way. - */ - sec->pai.alloc = &sec_alloc; - sec->pai.alloc_batch = &pai_alloc_batch_default; - sec->pai.expand = &sec_expand; - sec->pai.shrink = &sec_shrink; - sec->pai.dalloc = &sec_dalloc; - sec->pai.dalloc_batch = &pai_dalloc_batch_default; - return false; } -static sec_shard_t * +static uint8_t sec_shard_pick(tsdn_t *tsdn, sec_t *sec) { /* * Eventually, we should implement affinity, tracking source shard using @@ -100,7 +61,7 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) { * distribute across all shards. */ if (tsdn_null(tsdn)) { - return &sec->shards[0]; + return 0; } tsd_t *tsd = tsdn_tsd(tsdn); uint8_t *idxp = tsd_sec_shardp_get(tsd); @@ -118,284 +79,252 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) { assert(idx < (uint32_t)sec->opts.nshards); *idxp = (uint8_t)idx; } - return &sec->shards[*idxp]; + return *idxp; } -/* - * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an - * empty cache, we'll try to fill it, which can push the shard over it's limit. - */ -static void -sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - edata_list_active_t to_flush; - edata_list_active_init(&to_flush); - while (shard->bytes_cur > sec->opts.bytes_after_flush) { - /* Pick a victim. */ - sec_bin_t *bin = &shard->bins[shard->to_flush_next]; - - /* Update our victim-picking state. */ - shard->to_flush_next++; - if (shard->to_flush_next == sec->npsizes) { - shard->to_flush_next = 0; - } - - assert(shard->bytes_cur >= bin->bytes_cur); - if (bin->bytes_cur != 0) { - shard->bytes_cur -= bin->bytes_cur; - bin->bytes_cur = 0; - edata_list_active_concat(&to_flush, &bin->freelist); - } - /* - * Either bin->bytes_cur was 0, in which case we didn't touch - * the bin list but it should be empty anyways (or else we - * missed a bytes_cur update on a list modification), or it - * *was* 0 and we emptied it ourselves. Either way, it should - * be empty now. - */ - assert(edata_list_active_empty(&bin->freelist)); - } - - malloc_mutex_unlock(tsdn, &shard->mtx); - bool deferred_work_generated = false; - pai_dalloc_batch( - tsdn, sec->fallback, &to_flush, &deferred_work_generated); +static sec_bin_t * +sec_bin_pick(sec_t *sec, uint8_t shard, pszind_t pszind) { + assert(shard < sec->opts.nshards); + size_t ind = (size_t)shard * sec->npsizes + pszind; + assert(ind < sec->npsizes * sec->opts.nshards); + return &sec->bins[ind]; } static edata_t * -sec_shard_alloc_locked( - tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, sec_bin_t *bin) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - if (!shard->enabled) { - return NULL; - } +sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) { + malloc_mutex_assert_owner(tsdn, &bin->mtx); + edata_t *edata = edata_list_active_first(&bin->freelist); if (edata != NULL) { + assert(!edata_list_active_empty(&bin->freelist)); edata_list_active_remove(&bin->freelist, edata); - assert(edata_size_get(edata) <= bin->bytes_cur); - bin->bytes_cur -= edata_size_get(edata); - assert(edata_size_get(edata) <= shard->bytes_cur); - shard->bytes_cur -= edata_size_get(edata); + size_t sz = edata_size_get(edata); + assert(sz <= bin->bytes_cur && sz > 0); + bin->bytes_cur -= sz; + bin->stats.nhits++; } return edata; } static edata_t * -sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, - sec_bin_t *bin, size_t size, bool frequent_reuse) { - malloc_mutex_assert_not_owner(tsdn, &shard->mtx); +sec_multishard_trylock_alloc( + tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind) { + assert(sec->opts.nshards > 0); - edata_list_active_t result; - edata_list_active_init(&result); - bool deferred_work_generated = false; - size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size, - 1 + sec->opts.batch_fill_extra, &result, frequent_reuse, - &deferred_work_generated); - - edata_t *ret = edata_list_active_first(&result); - if (ret != NULL) { - edata_list_active_remove(&result, ret); + uint8_t cur_shard = sec_shard_pick(tsdn, sec); + sec_bin_t *bin; + for (size_t i = 0; i < sec->opts.nshards; ++i) { + bin = sec_bin_pick(sec, cur_shard, pszind); + if (!malloc_mutex_trylock(tsdn, &bin->mtx)) { + edata_t *edata = sec_bin_alloc_locked( + tsdn, sec, bin, size); + malloc_mutex_unlock(tsdn, &bin->mtx); + if (edata != NULL) { + JE_USDT(sec_alloc, 5, sec, bin, edata, size, + /* frequent_reuse */ 1); + return edata; + } + } + cur_shard++; + if (cur_shard == sec->opts.nshards) { + cur_shard = 0; + } } - - malloc_mutex_lock(tsdn, &shard->mtx); - bin->being_batch_filled = false; - /* - * Handle the easy case first: nothing to cache. Note that this can - * only happen in case of OOM, since sec_alloc checks the expected - * number of allocs, and doesn't bother going down the batch_fill - * pathway if there won't be anything left to cache. So to be in this - * code path, we must have asked for > 1 alloc, but only gotten 1 back. - */ - if (nalloc <= 1) { - malloc_mutex_unlock(tsdn, &shard->mtx); - return ret; + /* No bin had alloc or had the extent */ + assert(cur_shard == sec_shard_pick(tsdn, sec)); + bin = sec_bin_pick(sec, cur_shard, pszind); + malloc_mutex_lock(tsdn, &bin->mtx); + edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size); + if (edata == NULL) { + /* Only now we know it is a miss */ + bin->stats.nmisses++; } - - size_t new_cached_bytes = (nalloc - 1) * size; - - edata_list_active_concat(&bin->freelist, &result); - bin->bytes_cur += new_cached_bytes; - shard->bytes_cur += new_cached_bytes; - - if (shard->bytes_cur > sec->opts.max_bytes) { - sec_flush_some_and_unlock(tsdn, sec, shard); - } else { - malloc_mutex_unlock(tsdn, &shard->mtx); - } - - return ret; + malloc_mutex_unlock(tsdn, &bin->mtx); + JE_USDT(sec_alloc, 5, sec, bin, edata, size, /* frequent_reuse */ 1); + return edata; } -static edata_t * -sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, - bool guarded, bool frequent_reuse, bool *deferred_work_generated) { +edata_t * +sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) { + if (!sec_size_supported(sec, size)) { + return NULL; + } assert((size & PAGE_MASK) == 0); - assert(!guarded); - - sec_t *sec = (sec_t *)self; - - if (zero || alignment > PAGE || sec->opts.nshards == 0 - || size > sec->opts.max_alloc) { - return pai_alloc(tsdn, sec->fallback, size, alignment, zero, - /* guarded */ false, frequent_reuse, - deferred_work_generated); - } pszind_t pszind = sz_psz2ind(size); assert(pszind < sec->npsizes); - sec_shard_t *shard = sec_shard_pick(tsdn, sec); - sec_bin_t *bin = &shard->bins[pszind]; - bool do_batch_fill = false; - - malloc_mutex_lock(tsdn, &shard->mtx); - edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin); - if (edata == NULL) { - if (!bin->being_batch_filled - && sec->opts.batch_fill_extra > 0) { - bin->being_batch_filled = true; - do_batch_fill = true; + /* + * If there's only one shard, skip the trylock optimization and + * go straight to the blocking lock. + */ + if (sec->opts.nshards == 1) { + sec_bin_t *bin = sec_bin_pick(sec, /* shard */ 0, pszind); + malloc_mutex_lock(tsdn, &bin->mtx); + edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size); + if (edata == NULL) { + bin->stats.nmisses++; } + malloc_mutex_unlock(tsdn, &bin->mtx); + JE_USDT(sec_alloc, 5, sec, bin, edata, size, + /* frequent_reuse */ 1); + return edata; } - malloc_mutex_unlock(tsdn, &shard->mtx); - if (edata == NULL) { - if (do_batch_fill) { - edata = sec_batch_fill_and_alloc( - tsdn, sec, shard, bin, size, frequent_reuse); - } else { - edata = pai_alloc(tsdn, sec->fallback, size, alignment, - zero, /* guarded */ false, frequent_reuse, - deferred_work_generated); - } - } - JE_USDT(sec_alloc, 5, sec, shard, edata, size, frequent_reuse); - return edata; -} - -static bool -sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, - size_t new_size, bool zero, bool *deferred_work_generated) { - sec_t *sec = (sec_t *)self; - JE_USDT(sec_expand, 4, sec, edata, old_size, new_size); - return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero, - deferred_work_generated); -} - -static bool -sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, - size_t new_size, bool *deferred_work_generated) { - sec_t *sec = (sec_t *)self; - JE_USDT(sec_shrink, 4, sec, edata, old_size, new_size); - return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size, - deferred_work_generated); + return sec_multishard_trylock_alloc(tsdn, sec, size, pszind); } static void -sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - shard->bytes_cur = 0; - edata_list_active_t to_flush; - edata_list_active_init(&to_flush); - for (pszind_t i = 0; i < sec->npsizes; i++) { - sec_bin_t *bin = &shard->bins[i]; - bin->bytes_cur = 0; - edata_list_active_concat(&to_flush, &bin->freelist); - } +sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size, + edata_list_active_t *dalloc_list) { + malloc_mutex_assert_owner(tsdn, &bin->mtx); - /* - * Ordinarily we would try to avoid doing the batch deallocation while - * holding the shard mutex, but the flush_all pathways only happen when - * we're disabling the HPA or resetting the arena, both of which are - * rare pathways. - */ - bool deferred_work_generated = false; - pai_dalloc_batch( - tsdn, sec->fallback, &to_flush, &deferred_work_generated); -} - -static void -sec_shard_dalloc_and_unlock( - tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, edata_t *edata) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - assert(shard->bytes_cur <= sec->opts.max_bytes); - size_t size = edata_size_get(edata); - pszind_t pszind = sz_psz2ind(size); - assert(pszind < sec->npsizes); - /* - * Prepending here results in LIFO allocation per bin, which seems - * reasonable. - */ - sec_bin_t *bin = &shard->bins[pszind]; - edata_list_active_prepend(&bin->freelist, edata); bin->bytes_cur += size; - shard->bytes_cur += size; - if (shard->bytes_cur > sec->opts.max_bytes) { - /* - * We've exceeded the shard limit. We make two nods in the - * direction of fragmentation avoidance: we flush everything in - * the shard, rather than one particular bin, and we hold the - * lock while flushing (in case one of the extents we flush is - * highly preferred from a fragmentation-avoidance perspective - * in the backing allocator). This has the extra advantage of - * not requiring advanced cache balancing strategies. - */ - sec_flush_some_and_unlock(tsdn, sec, shard); - malloc_mutex_assert_not_owner(tsdn, &shard->mtx); - } else { - malloc_mutex_unlock(tsdn, &shard->mtx); - } -} + edata_t *edata = edata_list_active_first(dalloc_list); + assert(edata != NULL); + edata_list_active_remove(dalloc_list, edata); + JE_USDT(sec_dalloc, 3, sec, bin, edata); + edata_list_active_prepend(&bin->freelist, edata); + /* Single extent can be returned to SEC */ + assert(edata_list_active_empty(dalloc_list)); -static void -sec_dalloc( - tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) { - sec_t *sec = (sec_t *)self; - if (sec->opts.nshards == 0 - || edata_size_get(edata) > sec->opts.max_alloc) { - pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated); + if (bin->bytes_cur <= sec->opts.max_bytes) { + bin->stats.ndalloc_noflush++; return; } - sec_shard_t *shard = sec_shard_pick(tsdn, sec); - JE_USDT(sec_dalloc, 3, sec, shard, edata); - malloc_mutex_lock(tsdn, &shard->mtx); - if (shard->enabled) { - sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata); + bin->stats.ndalloc_flush++; + /* we want to flush 1/4 of max_bytes */ + size_t bytes_target = sec->opts.max_bytes - (sec->opts.max_bytes >> 2); + while (bin->bytes_cur > bytes_target + && !edata_list_active_empty(&bin->freelist)) { + edata_t *cur = edata_list_active_last(&bin->freelist); + size_t sz = edata_size_get(cur); + assert(sz <= bin->bytes_cur && sz > 0); + bin->bytes_cur -= sz; + edata_list_active_remove(&bin->freelist, cur); + edata_list_active_append(dalloc_list, cur); + } +} + +static void +sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size, + pszind_t pszind, edata_list_active_t *dalloc_list) { + assert(sec->opts.nshards > 0); + + /* Try to dalloc in this threads bin first */ + uint8_t cur_shard = sec_shard_pick(tsdn, sec); + for (size_t i = 0; i < sec->opts.nshards; ++i) { + sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind); + if (!malloc_mutex_trylock(tsdn, &bin->mtx)) { + sec_bin_dalloc_locked( + tsdn, sec, bin, size, dalloc_list); + malloc_mutex_unlock(tsdn, &bin->mtx); + return; + } + cur_shard++; + if (cur_shard == sec->opts.nshards) { + cur_shard = 0; + } + } + /* No bin had alloc or had the extent */ + assert(cur_shard == sec_shard_pick(tsdn, sec)); + sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind); + malloc_mutex_lock(tsdn, &bin->mtx); + sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list); + malloc_mutex_unlock(tsdn, &bin->mtx); +} + +void +sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) { + if (!sec_is_used(sec)) { + return; + } + edata_t *edata = edata_list_active_first(dalloc_list); + size_t size = edata_size_get(edata); + if (size > sec->opts.max_alloc) { + return; + } + pszind_t pszind = sz_psz2ind(size); + assert(pszind < sec->npsizes); + + /* + * If there's only one shard, skip the trylock optimization and + * go straight to the blocking lock. + */ + if (sec->opts.nshards == 1) { + sec_bin_t *bin = sec_bin_pick(sec, /* shard */ 0, pszind); + malloc_mutex_lock(tsdn, &bin->mtx); + sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list); + malloc_mutex_unlock(tsdn, &bin->mtx); + return; + } + sec_multishard_trylock_dalloc(tsdn, sec, size, pszind, dalloc_list); +} + +void +sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result, + size_t nallocs) { + assert((size & PAGE_MASK) == 0); + assert(sec->opts.nshards != 0 && size <= sec->opts.max_alloc); + assert(nallocs > 0); + + pszind_t pszind = sz_psz2ind(size); + assert(pszind < sec->npsizes); + + sec_bin_t *bin = sec_bin_pick(sec, sec_shard_pick(tsdn, sec), pszind); + malloc_mutex_assert_not_owner(tsdn, &bin->mtx); + malloc_mutex_lock(tsdn, &bin->mtx); + size_t new_cached_bytes = nallocs * size; + if (bin->bytes_cur + new_cached_bytes <= sec->opts.max_bytes) { + assert(!edata_list_active_empty(result)); + edata_list_active_concat(&bin->freelist, result); + bin->bytes_cur += new_cached_bytes; } else { - malloc_mutex_unlock(tsdn, &shard->mtx); - pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated); + /* + * Unlikely case of many threads filling at the same time and + * going above max. + */ + bin->stats.noverfills++; + while (bin->bytes_cur + size <= sec->opts.max_bytes) { + edata_t *edata = edata_list_active_first(result); + if (edata == NULL) { + break; + } + edata_list_active_remove(result, edata); + assert(size == edata_size_get(edata)); + edata_list_active_append(&bin->freelist, edata); + bin->bytes_cur += size; + } } + malloc_mutex_unlock(tsdn, &bin->mtx); } void -sec_flush(tsdn_t *tsdn, sec_t *sec) { - for (size_t i = 0; i < sec->opts.nshards; i++) { - malloc_mutex_lock(tsdn, &sec->shards[i].mtx); - sec_flush_all_locked(tsdn, sec, &sec->shards[i]); - malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); +sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush) { + if (!sec_is_used(sec)) { + return; } -} - -void -sec_disable(tsdn_t *tsdn, sec_t *sec) { - for (size_t i = 0; i < sec->opts.nshards; i++) { - malloc_mutex_lock(tsdn, &sec->shards[i].mtx); - sec->shards[i].enabled = false; - sec_flush_all_locked(tsdn, sec, &sec->shards[i]); - malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + size_t ntotal_bins = sec->opts.nshards * sec->npsizes; + for (pszind_t i = 0; i < ntotal_bins; i++) { + sec_bin_t *bin = &sec->bins[i]; + malloc_mutex_lock(tsdn, &bin->mtx); + bin->bytes_cur = 0; + edata_list_active_concat(to_flush, &bin->freelist); + malloc_mutex_unlock(tsdn, &bin->mtx); } } void sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) { + if (!sec_is_used(sec)) { + return; + } size_t sum = 0; - for (size_t i = 0; i < sec->opts.nshards; i++) { - /* - * We could save these lock acquisitions by making bytes_cur - * atomic, but stats collection is rare anyways and we expect - * the number and type of stats to get more interesting. - */ - malloc_mutex_lock(tsdn, &sec->shards[i].mtx); - sum += sec->shards[i].bytes_cur; - malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + size_t ntotal_bins = sec->opts.nshards * sec->npsizes; + for (pszind_t i = 0; i < ntotal_bins; i++) { + sec_bin_t *bin = &sec->bins[i]; + malloc_mutex_lock(tsdn, &bin->mtx); + sum += bin->bytes_cur; + sec_bin_stats_accum(&stats->total, &bin->stats); + malloc_mutex_unlock(tsdn, &bin->mtx); } stats->bytes += sum; } @@ -403,31 +332,50 @@ sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) { void sec_mutex_stats_read( tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data) { - for (size_t i = 0; i < sec->opts.nshards; i++) { - malloc_mutex_lock(tsdn, &sec->shards[i].mtx); - malloc_mutex_prof_accum( - tsdn, mutex_prof_data, &sec->shards[i].mtx); - malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + if (!sec_is_used(sec)) { + return; + } + size_t ntotal_bins = sec->opts.nshards * sec->npsizes; + for (pszind_t i = 0; i < ntotal_bins; i++) { + sec_bin_t *bin = &sec->bins[i]; + malloc_mutex_lock(tsdn, &bin->mtx); + malloc_mutex_prof_accum(tsdn, mutex_prof_data, &bin->mtx); + malloc_mutex_unlock(tsdn, &bin->mtx); } } void sec_prefork2(tsdn_t *tsdn, sec_t *sec) { - for (size_t i = 0; i < sec->opts.nshards; i++) { - malloc_mutex_prefork(tsdn, &sec->shards[i].mtx); + if (!sec_is_used(sec)) { + return; + } + size_t ntotal_bins = sec->opts.nshards * sec->npsizes; + for (pszind_t i = 0; i < ntotal_bins; i++) { + sec_bin_t *bin = &sec->bins[i]; + malloc_mutex_prefork(tsdn, &bin->mtx); } } void sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) { - for (size_t i = 0; i < sec->opts.nshards; i++) { - malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx); + if (!sec_is_used(sec)) { + return; + } + size_t ntotal_bins = sec->opts.nshards * sec->npsizes; + for (pszind_t i = 0; i < ntotal_bins; i++) { + sec_bin_t *bin = &sec->bins[i]; + malloc_mutex_postfork_parent(tsdn, &bin->mtx); } } void sec_postfork_child(tsdn_t *tsdn, sec_t *sec) { - for (size_t i = 0; i < sec->opts.nshards; i++) { - malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx); + if (!sec_is_used(sec)) { + return; + } + size_t ntotal_bins = sec->opts.nshards * sec->npsizes; + for (pszind_t i = 0; i < ntotal_bins; i++) { + sec_bin_t *bin = &sec->bins[i]; + malloc_mutex_postfork_child(tsdn, &bin->mtx); } } diff --git a/src/stats.c b/src/stats.c index 2ccac6c9..be70a6fc 100644 --- a/src/stats.c +++ b/src/stats.c @@ -791,9 +791,35 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) { static void stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) { size_t sec_bytes; + size_t sec_hits; + size_t sec_misses; + size_t sec_dalloc_flush; + size_t sec_dalloc_noflush; + size_t sec_overfills; CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t); emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache", emitter_type_size, &sec_bytes); + CTL_M2_GET("stats.arenas.0.hpa_sec_hits", i, &sec_hits, size_t); + emitter_kv(emitter, "sec_hits", "Total hits in small extent cache", + emitter_type_size, &sec_hits); + CTL_M2_GET("stats.arenas.0.hpa_sec_misses", i, &sec_misses, size_t); + emitter_kv(emitter, "sec_misses", "Total misses in small extent cache", + emitter_type_size, &sec_misses); + CTL_M2_GET("stats.arenas.0.hpa_sec_dalloc_noflush", i, + &sec_dalloc_noflush, size_t); + emitter_kv(emitter, "sec_dalloc_noflush", + "Dalloc calls without flush in small extent cache", + emitter_type_size, &sec_dalloc_noflush); + CTL_M2_GET("stats.arenas.0.hpa_sec_dalloc_flush", i, &sec_dalloc_flush, + size_t); + emitter_kv(emitter, "sec_dalloc_flush", + "Dalloc calls with flush in small extent cache", emitter_type_size, + &sec_dalloc_flush); + CTL_M2_GET( + "stats.arenas.0.hpa_sec_overfills", i, &sec_overfills, size_t); + emitter_kv(emitter, "sec_overfills", + "sec_fill calls that went over max_bytes", emitter_type_size, + &sec_overfills); } static void @@ -1642,7 +1668,6 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_SIZE_T("hpa_sec_nshards") OPT_WRITE_SIZE_T("hpa_sec_max_alloc") OPT_WRITE_SIZE_T("hpa_sec_max_bytes") - OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush") OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra") OPT_WRITE_BOOL("huge_arena_pac_thp") OPT_WRITE_CHAR_P("metadata_thp") diff --git a/test/unit/hpa.c b/test/unit/hpa.c index 5937601e..9c4253cd 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -113,10 +113,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { err = hpa_central_init(&test_data->central, test_data->base, hooks); assert_false(err, ""); - - err = hpa_shard_init(&test_data->shard, &test_data->central, + sec_opts_t sec_opts; + sec_opts.nshards = 0; + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central, &test_data->emap, test_data->base, &test_data->shard_edata_cache, - SHARD_IND, opts); + SHARD_IND, opts, &sec_opts); assert_false(err, ""); return (hpa_shard_t *)test_data; @@ -309,83 +311,6 @@ TEST_BEGIN(test_stress) { } TEST_END -static void -expect_contiguous(edata_t **edatas, size_t nedatas) { - for (size_t i = 0; i < nedatas; i++) { - size_t expected = (size_t)edata_base_get(edatas[0]) + i * PAGE; - expect_zu_eq(expected, (size_t)edata_base_get(edatas[i]), - "Mismatch at index %zu", i); - } -} - -TEST_BEGIN(test_alloc_dalloc_batch) { - test_skip_if(!hpa_supported()); - - hpa_shard_t *shard = create_test_data( - &hpa_hooks_default, &test_hpa_shard_opts_default); - tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); - - bool deferred_work_generated = false; - - enum { NALLOCS = 8 }; - - edata_t *allocs[NALLOCS]; - /* - * Allocate a mix of ways; first half from regular alloc, second half - * from alloc_batch. - */ - for (size_t i = 0; i < NALLOCS / 2; i++) { - allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, - /* frequent_reuse */ false, &deferred_work_generated); - expect_ptr_not_null(allocs[i], "Unexpected alloc failure"); - } - edata_list_active_t allocs_list; - edata_list_active_init(&allocs_list); - size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2, - &allocs_list, /* frequent_reuse */ false, &deferred_work_generated); - expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom"); - for (size_t i = NALLOCS / 2; i < NALLOCS; i++) { - allocs[i] = edata_list_active_first(&allocs_list); - edata_list_active_remove(&allocs_list, allocs[i]); - } - - /* - * Should have allocated them contiguously, despite the differing - * methods used. - */ - void *orig_base = edata_base_get(allocs[0]); - expect_contiguous(allocs, NALLOCS); - - /* - * Batch dalloc the first half, individually deallocate the second half. - */ - for (size_t i = 0; i < NALLOCS / 2; i++) { - edata_list_active_append(&allocs_list, allocs[i]); - } - pai_dalloc_batch( - tsdn, &shard->pai, &allocs_list, &deferred_work_generated); - for (size_t i = NALLOCS / 2; i < NALLOCS; i++) { - pai_dalloc( - tsdn, &shard->pai, allocs[i], &deferred_work_generated); - } - - /* Reallocate (individually), and ensure reuse and contiguity. */ - for (size_t i = 0; i < NALLOCS; i++) { - allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - expect_ptr_not_null(allocs[i], "Unexpected alloc failure."); - } - void *new_base = edata_base_get(allocs[0]); - expect_ptr_eq( - orig_base, new_base, "Failed to reuse the allocated memory."); - expect_contiguous(allocs, NALLOCS); - - destroy_test_data(shard); -} -TEST_END - static uintptr_t defer_bump_ptr = HUGEPAGE * 123; static void * defer_test_map(size_t size) { @@ -1533,8 +1458,7 @@ main(void) { (void)mem_tree_iter; (void)mem_tree_reverse_iter; (void)mem_tree_destroy; - return test_no_reentrancy(test_alloc_max, test_stress, - test_alloc_dalloc_batch, test_defer_time, + return test_no_reentrancy(test_alloc_max, test_stress, test_defer_time, test_purge_no_infinite_loop, test_no_min_purge_interval, test_min_purge_interval, test_purge, test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero, diff --git a/test/unit/hpa_sec_integration.c b/test/unit/hpa_sec_integration.c new file mode 100644 index 00000000..c54cdc0c --- /dev/null +++ b/test/unit/hpa_sec_integration.c @@ -0,0 +1,239 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/hpa.h" +#include "jemalloc/internal/nstime.h" + +#define SHARD_IND 111 + +#define ALLOC_MAX (HUGEPAGE) + +typedef struct test_data_s test_data_t; +struct test_data_s { + /* + * Must be the first member -- we convert back and forth between the + * test_data_t and the hpa_shard_t; + */ + hpa_shard_t shard; + hpa_central_t central; + base_t *base; + edata_cache_t shard_edata_cache; + + emap_t emap; +}; + +static hpa_shard_opts_t test_hpa_shard_opts = { + /* slab_max_alloc */ + HUGEPAGE, + /* hugification_threshold */ + 0.9 * HUGEPAGE, + /* dirty_mult */ + FXP_INIT_PERCENT(10), + /* deferral_allowed */ + true, + /* hugify_delay_ms */ + 0, + /* hugify_sync */ + false, + /* min_purge_interval_ms */ + 5, + /* experimental_max_purge_nhp */ + -1, + /* purge_threshold */ + PAGE, + /* min_purge_delay_ms */ + 10, + /* hugify_style */ + hpa_hugify_style_lazy}; + +static hpa_shard_t * +create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts, + const sec_opts_t *sec_opts) { + bool err; + base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND, + &ehooks_default_extent_hooks, /* metadata_use_hooks */ true); + assert_ptr_not_null(base, ""); + + test_data_t *test_data = malloc(sizeof(test_data_t)); + assert_ptr_not_null(test_data, ""); + + test_data->base = base; + + err = edata_cache_init(&test_data->shard_edata_cache, base); + assert_false(err, ""); + + err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false); + assert_false(err, ""); + + err = hpa_central_init(&test_data->central, test_data->base, hooks); + assert_false(err, ""); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central, + &test_data->emap, test_data->base, &test_data->shard_edata_cache, + SHARD_IND, opts, sec_opts); + assert_false(err, ""); + + return (hpa_shard_t *)test_data; +} + +static void +destroy_test_data(hpa_shard_t *shard) { + test_data_t *test_data = (test_data_t *)shard; + base_delete(TSDN_NULL, test_data->base); + free(test_data); +} + +static uintptr_t defer_bump_ptr = HUGEPAGE * 123; +static void * +defer_test_map(size_t size) { + void *result = (void *)defer_bump_ptr; + defer_bump_ptr += size; + return result; +} + +static void +defer_test_unmap(void *ptr, size_t size) { + (void)ptr; + (void)size; +} + +static size_t ndefer_purge_calls = 0; +static size_t npurge_size = 0; +static void +defer_test_purge(void *ptr, size_t size) { + (void)ptr; + npurge_size = size; + ++ndefer_purge_calls; +} + +static bool defer_vectorized_purge_called = false; +static bool +defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) { + (void)vec; + (void)nbytes; + ++ndefer_purge_calls; + defer_vectorized_purge_called = true; + return false; +} + +static size_t ndefer_hugify_calls = 0; +static bool +defer_test_hugify(void *ptr, size_t size, bool sync) { + ++ndefer_hugify_calls; + return false; +} + +static size_t ndefer_dehugify_calls = 0; +static void +defer_test_dehugify(void *ptr, size_t size) { + ++ndefer_dehugify_calls; +} + +static nstime_t defer_curtime; +static void +defer_test_curtime(nstime_t *r_time, bool first_reading) { + *r_time = defer_curtime; +} + +static uint64_t +defer_test_ms_since(nstime_t *past_time) { + return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000; +} + +// test that freed pages stay in SEC and hpa thinks they are active + +TEST_BEGIN(test_hpa_sec) { + test_skip_if(!hpa_supported()); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; + + hpa_shard_opts_t opts = test_hpa_shard_opts; + + enum { NALLOCS = 8 }; + sec_opts_t sec_opts; + sec_opts.nshards = 1; + sec_opts.max_alloc = 2 * PAGE; + sec_opts.max_bytes = NALLOCS * PAGE; + sec_opts.batch_fill_extra = 4; + + hpa_shard_t *shard = create_test_data(&hooks, &opts, &sec_opts); + bool deferred_work_generated = false; + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + + /* alloc 1 PAGE, confirm sec has fill_extra bytes. */ + edata_t *edata1 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false, + false, &deferred_work_generated); + expect_ptr_not_null(edata1, "Unexpected null edata"); + hpa_shard_stats_t hpa_stats; + memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t)); + hpa_shard_stats_merge(tsdn, shard, &hpa_stats); + expect_zu_eq(hpa_stats.psset_stats.merged.nactive, + 1 + sec_opts.batch_fill_extra, ""); + expect_zu_eq(hpa_stats.secstats.bytes, PAGE * sec_opts.batch_fill_extra, + "sec should have fill extra pages"); + + /* Alloc/dealloc NALLOCS times and confirm extents are in sec. */ + edata_t *edatas[NALLOCS]; + for (int i = 0; i < NALLOCS; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t)); + hpa_shard_stats_merge(tsdn, shard, &hpa_stats); + expect_zu_eq(hpa_stats.psset_stats.merged.nactive, 2 + NALLOCS, ""); + expect_zu_eq(hpa_stats.secstats.bytes, PAGE, "2 refills (at 0 and 4)"); + + for (int i = 0; i < NALLOCS - 1; i++) { + pai_dalloc( + tsdn, &shard->pai, edatas[i], &deferred_work_generated); + } + memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t)); + hpa_shard_stats_merge(tsdn, shard, &hpa_stats); + expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (2 + NALLOCS), ""); + expect_zu_eq( + hpa_stats.secstats.bytes, sec_opts.max_bytes, "sec should be full"); + + /* this one should flush 1 + 0.25 * 8 = 3 extents */ + pai_dalloc( + tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated); + memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t)); + hpa_shard_stats_merge(tsdn, shard, &hpa_stats); + expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (NALLOCS - 1), ""); + expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, ""); + expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes, + "sec should be full"); + + /* Next allocation should come from SEC and not increase active */ + edata_t *edata2 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false, + false, &deferred_work_generated); + expect_ptr_not_null(edata2, "Unexpected null edata"); + memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t)); + hpa_shard_stats_merge(tsdn, shard, &hpa_stats); + expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, ""); + expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes - PAGE, + "sec should have max_bytes minus one page that just came from it"); + + /* We return this one and it stays in the cache */ + pai_dalloc(tsdn, &shard->pai, edata2, &deferred_work_generated); + memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t)); + hpa_shard_stats_merge(tsdn, shard, &hpa_stats); + expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, ""); + expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, ""); + expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes, ""); + + destroy_test_data(shard); +} +TEST_END + +int +main(void) { + return test_no_reentrancy(test_hpa_sec); +} diff --git a/test/unit/hpa_sec_integration.sh b/test/unit/hpa_sec_integration.sh new file mode 100644 index 00000000..22451f1d --- /dev/null +++ b/test/unit/hpa_sec_integration.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:false" diff --git a/test/unit/hpa_thp_always.c b/test/unit/hpa_thp_always.c index 29c86cdd..6e56e663 100644 --- a/test/unit/hpa_thp_always.c +++ b/test/unit/hpa_thp_always.c @@ -65,10 +65,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { err = hpa_central_init(&test_data->central, test_data->base, hooks); assert_false(err, ""); - - err = hpa_shard_init(&test_data->shard, &test_data->central, + sec_opts_t sec_opts; + sec_opts.nshards = 0; + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central, &test_data->emap, test_data->base, &test_data->shard_edata_cache, - SHARD_IND, opts); + SHARD_IND, opts, &sec_opts); assert_false(err, ""); return (hpa_shard_t *)test_data; diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c index e82f0ffb..2121de49 100644 --- a/test/unit/hpa_vectorized_madvise.c +++ b/test/unit/hpa_vectorized_madvise.c @@ -66,9 +66,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { err = hpa_central_init(&test_data->central, test_data->base, hooks); assert_false(err, ""); - err = hpa_shard_init(&test_data->shard, &test_data->central, + sec_opts_t sec_opts; + sec_opts.nshards = 0; + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central, &test_data->emap, test_data->base, &test_data->shard_edata_cache, - SHARD_IND, opts); + SHARD_IND, opts, &sec_opts); assert_false(err, ""); return (hpa_shard_t *)test_data; diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c index d542f72a..e92988de 100644 --- a/test/unit/hpa_vectorized_madvise_large_batch.c +++ b/test/unit/hpa_vectorized_madvise_large_batch.c @@ -66,10 +66,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { err = hpa_central_init(&test_data->central, test_data->base, hooks); assert_false(err, ""); - - err = hpa_shard_init(&test_data->shard, &test_data->central, + sec_opts_t sec_opts; + sec_opts.nshards = 0; + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central, &test_data->emap, test_data->base, &test_data->shard_edata_cache, - SHARD_IND, opts); + SHARD_IND, opts, &sec_opts); assert_false(err, ""); return (hpa_shard_t *)test_data; diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index f409f687..4c11e485 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -313,7 +313,6 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always); TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always); TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always); - TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always); TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always); TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always); TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always); diff --git a/test/unit/sec.c b/test/unit/sec.c index d57c66ec..2a6a00ce 100644 --- a/test/unit/sec.c +++ b/test/unit/sec.c @@ -2,618 +2,493 @@ #include "jemalloc/internal/sec.h" -typedef struct pai_test_allocator_s pai_test_allocator_t; -struct pai_test_allocator_s { - pai_t pai; - bool alloc_fail; - size_t alloc_count; - size_t alloc_batch_count; - size_t dalloc_count; - size_t dalloc_batch_count; +typedef struct test_data_s test_data_t; +struct test_data_s { /* - * We use a simple bump allocator as the implementation. This isn't - * *really* correct, since we may allow expansion into a subsequent - * allocation, but it's not like the SEC is really examining the - * pointers it gets back; this is mostly just helpful for debugging. + * Must be the first member -- we convert back and forth between the + * test_data_t and the sec_t; */ - uintptr_t next_ptr; - size_t expand_count; - bool expand_return_value; - size_t shrink_count; - bool shrink_return_value; + sec_t sec; + base_t *base; }; static void -test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc, - size_t max_bytes) { - sec_opts_t opts; - opts.nshards = 1; - opts.max_alloc = max_alloc; - opts.max_bytes = max_bytes; - /* - * Just choose reasonable defaults for these; most tests don't care so - * long as they're something reasonable. - */ - opts.bytes_after_flush = max_bytes / 2; - opts.batch_fill_extra = 4; - - /* - * We end up leaking this base, but that's fine; this test is - * short-running, and SECs are arena-scoped in reality. - */ - base_t *base = base_new(TSDN_NULL, /* ind */ 123, +test_data_init(tsdn_t *tsdn, test_data_t *tdata, const sec_opts_t *opts) { + tdata->base = base_new(TSDN_NULL, /* ind */ 123, &ehooks_default_extent_hooks, /* metadata_use_hooks */ true); - bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts); + bool err = sec_init(tsdn, &tdata->sec, tdata->base, opts); assert_false(err, "Unexpected initialization failure"); - assert_u_ge(sec->npsizes, 0, "Zero size classes allowed for caching"); -} - -static inline edata_t * -pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size, - size_t alignment, bool zero, bool guarded, bool frequent_reuse, - bool *deferred_work_generated) { - assert(!guarded); - pai_test_allocator_t *ta = (pai_test_allocator_t *)self; - if (ta->alloc_fail) { - return NULL; + if (tdata->sec.opts.nshards > 0) { + assert_u_ge(tdata->sec.npsizes, 0, + "Zero size classes allowed for caching"); } - edata_t *edata = malloc(sizeof(edata_t)); - assert_ptr_not_null(edata, ""); - ta->next_ptr += alignment - 1; - edata_init(edata, /* arena_ind */ 0, - (void *)(ta->next_ptr & ~(alignment - 1)), size, - /* slab */ false, - /* szind */ 0, /* sn */ 1, extent_state_active, /* zero */ zero, - /* comitted */ true, /* ranged */ false, EXTENT_NOT_HEAD); - ta->next_ptr += size; - ta->alloc_count++; - return edata; -} - -static inline size_t -pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, bool frequent_reuse, - bool *deferred_work_generated) { - pai_test_allocator_t *ta = (pai_test_allocator_t *)self; - if (ta->alloc_fail) { - return 0; - } - for (size_t i = 0; i < nallocs; i++) { - edata_t *edata = malloc(sizeof(edata_t)); - assert_ptr_not_null(edata, ""); - edata_init(edata, /* arena_ind */ 0, (void *)ta->next_ptr, size, - /* slab */ false, /* szind */ 0, /* sn */ 1, - extent_state_active, /* zero */ false, /* comitted */ true, - /* ranged */ false, EXTENT_NOT_HEAD); - ta->next_ptr += size; - ta->alloc_batch_count++; - edata_list_active_append(results, edata); - } - return nallocs; -} - -static bool -pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, - size_t old_size, size_t new_size, bool zero, - bool *deferred_work_generated) { - pai_test_allocator_t *ta = (pai_test_allocator_t *)self; - ta->expand_count++; - return ta->expand_return_value; -} - -static bool -pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, - size_t old_size, size_t new_size, bool *deferred_work_generated) { - pai_test_allocator_t *ta = (pai_test_allocator_t *)self; - ta->shrink_count++; - return ta->shrink_return_value; } static void -pai_test_allocator_dalloc( - tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) { - pai_test_allocator_t *ta = (pai_test_allocator_t *)self; - ta->dalloc_count++; - free(edata); +destroy_test_data(tsdn_t *tsdn, test_data_t *tdata) { + /* There is no destroy sec to delete the bins ?! */ + base_delete(tsdn, tdata->base); } -static void -pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self, - edata_list_active_t *list, bool *deferred_work_generated) { - pai_test_allocator_t *ta = (pai_test_allocator_t *)self; - - edata_t *edata; - while ((edata = edata_list_active_first(list)) != NULL) { - edata_list_active_remove(list, edata); - ta->dalloc_batch_count++; - free(edata); - } -} - -static inline void -pai_test_allocator_init(pai_test_allocator_t *ta) { - ta->alloc_fail = false; - ta->alloc_count = 0; - ta->alloc_batch_count = 0; - ta->dalloc_count = 0; - ta->dalloc_batch_count = 0; - /* Just don't start the edata at 0. */ - ta->next_ptr = 10 * PAGE; - ta->expand_count = 0; - ta->expand_return_value = false; - ta->shrink_count = 0; - ta->shrink_return_value = false; - ta->pai.alloc = &pai_test_allocator_alloc; - ta->pai.alloc_batch = &pai_test_allocator_alloc_batch; - ta->pai.expand = &pai_test_allocator_expand; - ta->pai.shrink = &pai_test_allocator_shrink; - ta->pai.dalloc = &pai_test_allocator_dalloc; - ta->pai.dalloc_batch = &pai_test_allocator_dalloc_batch; -} - -TEST_BEGIN(test_reuse) { - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; - /* - * We can't use the "real" tsd, since we malloc within the test - * allocator hooks; we'd get lock inversion crashes. Eventually, we - * should have a way to mock tsds, but for now just don't do any - * lock-order checking. - */ - tsdn_t *tsdn = TSDN_NULL; - /* - * 11 allocs apiece of 1-PAGE and 2-PAGE objects means that we should be - * able to get to 33 pages in the cache before triggering a flush. We - * set the flush liimt to twice this amount, to avoid accidentally - * triggering a flush caused by the batch-allocation down the cache fill - * pathway disrupting ordering. - */ - enum { NALLOCS = 11 }; - edata_t *one_page[NALLOCS]; - edata_t *two_page[NALLOCS]; - bool deferred_work_generated = false; - test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE, - /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE)); - for (int i = 0; i < NALLOCS; i++) { - one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - expect_ptr_not_null(one_page[i], "Unexpected alloc failure"); - two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - expect_ptr_not_null(one_page[i], "Unexpected alloc failure"); - } - expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs"); - size_t max_allocs = ta.alloc_count + ta.alloc_batch_count; - expect_zu_le( - 2 * NALLOCS, max_allocs, "Incorrect number of allocations"); - expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations"); - /* - * Free in a different order than we allocated, to make sure free-list - * separation works correctly. - */ - for (int i = NALLOCS - 1; i >= 0; i--) { - pai_dalloc( - tsdn, &sec.pai, one_page[i], &deferred_work_generated); - } - for (int i = NALLOCS - 1; i >= 0; i--) { - pai_dalloc( - tsdn, &sec.pai, two_page[i], &deferred_work_generated); - } - expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count, - "Incorrect number of allocations"); - expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations"); - /* - * Check that the n'th most recent deallocated extent is returned for - * the n'th alloc request of a given size. - */ - for (int i = 0; i < NALLOCS; i++) { - edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - expect_ptr_eq(one_page[i], alloc1, "Got unexpected allocation"); - expect_ptr_eq(two_page[i], alloc2, "Got unexpected allocation"); - } - expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count, - "Incorrect number of allocations"); - expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations"); -} -TEST_END - -TEST_BEGIN(test_auto_flush) { - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; - /* See the note above -- we can't use the real tsd. */ - tsdn_t *tsdn = TSDN_NULL; - /* - * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be - * able to get to 30 pages in the cache before triggering a flush. The - * choice of NALLOCS here is chosen to match the batch allocation - * default (4 extra + 1 == 5; so 10 allocations leaves the cache exactly - * empty, even in the presence of batch allocation on fill). - * Eventually, once our allocation batching strategies become smarter, - * this should change. - */ - enum { NALLOCS = 10 }; - edata_t *extra_alloc; - edata_t *allocs[NALLOCS]; - bool deferred_work_generated = false; - test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE, - /* max_bytes */ NALLOCS * PAGE); - for (int i = 0; i < NALLOCS; i++) { - allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - expect_ptr_not_null(allocs[i], "Unexpected alloc failure"); - } - extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false, - /* guarded */ false, /* frequent_reuse */ false, - &deferred_work_generated); - expect_ptr_not_null(extra_alloc, "Unexpected alloc failure"); - size_t max_allocs = ta.alloc_count + ta.alloc_batch_count; - expect_zu_le( - NALLOCS + 1, max_allocs, "Incorrect number of allocations"); - expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations"); - /* Free until the SEC is full, but should not have flushed yet. */ - for (int i = 0; i < NALLOCS; i++) { - pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated); - } - expect_zu_le( - NALLOCS + 1, max_allocs, "Incorrect number of allocations"); - expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations"); - /* - * Free the extra allocation; this should trigger a flush. The internal - * flushing logic is allowed to get complicated; for now, we rely on our - * whitebox knowledge of the fact that the SEC flushes bins in their - * entirety when it decides to do so, and it has only one bin active - * right now. - */ - pai_dalloc(tsdn, &sec.pai, extra_alloc, &deferred_work_generated); - expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count, - "Incorrect number of allocations"); - expect_zu_eq(0, ta.dalloc_count, - "Incorrect number of (non-batch) deallocations"); - expect_zu_eq(NALLOCS + 1, ta.dalloc_batch_count, - "Incorrect number of batch deallocations"); -} -TEST_END - -/* - * A disable and a flush are *almost* equivalent; the only difference is what - * happens afterwards; disabling disallows all future caching as well. - */ -static void -do_disable_flush_test(bool is_disable) { - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; - /* See the note above -- we can't use the real tsd. */ - tsdn_t *tsdn = TSDN_NULL; - - enum { NALLOCS = 11 }; - edata_t *allocs[NALLOCS]; - bool deferred_work_generated = false; - test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE, - /* max_bytes */ NALLOCS * PAGE); - for (int i = 0; i < NALLOCS; i++) { - allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - expect_ptr_not_null(allocs[i], "Unexpected alloc failure"); - } - /* Free all but the last aloc. */ - for (int i = 0; i < NALLOCS - 1; i++) { - pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated); - } - size_t max_allocs = ta.alloc_count + ta.alloc_batch_count; - - expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations"); - expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations"); - - if (is_disable) { - sec_disable(tsdn, &sec); - } else { - sec_flush(tsdn, &sec); - } - - expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count, - "Incorrect number of allocations"); - expect_zu_eq(0, ta.dalloc_count, - "Incorrect number of (non-batch) deallocations"); - expect_zu_le(NALLOCS - 1, ta.dalloc_batch_count, - "Incorrect number of batch deallocations"); - size_t old_dalloc_batch_count = ta.dalloc_batch_count; - - /* - * If we free into a disabled SEC, it should forward to the fallback. - * Otherwise, the SEC should accept the allocation. - */ - pai_dalloc( - tsdn, &sec.pai, allocs[NALLOCS - 1], &deferred_work_generated); - - expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count, - "Incorrect number of allocations"); - expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count, - "Incorrect number of (non-batch) deallocations"); - expect_zu_eq(old_dalloc_batch_count, ta.dalloc_batch_count, - "Incorrect number of batch deallocations"); -} - -TEST_BEGIN(test_disable) { - do_disable_flush_test(/* is_disable */ true); -} -TEST_END - -TEST_BEGIN(test_flush) { - do_disable_flush_test(/* is_disable */ false); -} -TEST_END - -TEST_BEGIN(test_max_alloc_respected) { - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; - /* See the note above -- we can't use the real tsd. */ - tsdn_t *tsdn = TSDN_NULL; - - size_t max_alloc = 2 * PAGE; - size_t attempted_alloc = 3 * PAGE; - - bool deferred_work_generated = false; - - test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc, - /* max_bytes */ 1000 * PAGE); - - for (size_t i = 0; i < 100; i++) { - expect_zu_eq( - i, ta.alloc_count, "Incorrect number of allocations"); - expect_zu_eq( - i, ta.dalloc_count, "Incorrect number of deallocations"); - edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc, - PAGE, /* zero */ false, /* guarded */ false, - /* frequent_reuse */ false, &deferred_work_generated); - expect_ptr_not_null(edata, "Unexpected alloc failure"); - expect_zu_eq( - i + 1, ta.alloc_count, "Incorrect number of allocations"); - expect_zu_eq( - i, ta.dalloc_count, "Incorrect number of deallocations"); - pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated); - } -} -TEST_END - -TEST_BEGIN(test_expand_shrink_delegate) { - /* - * Expand and shrink shouldn't affect sec state; they should just - * delegate to the fallback PAI. - */ - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; - /* See the note above -- we can't use the real tsd. */ - tsdn_t *tsdn = TSDN_NULL; - - bool deferred_work_generated = false; - - test_sec_init(&sec, &ta.pai, /* nshards */ 1, - /* max_alloc */ USIZE_GROW_SLOW_THRESHOLD, - /* max_bytes */ 1000 * PAGE); - edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ false, - &deferred_work_generated); - expect_ptr_not_null(edata, "Unexpected alloc failure"); - - bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE, - /* zero */ false, &deferred_work_generated); - expect_false(err, "Unexpected expand failure"); - expect_zu_eq(1, ta.expand_count, ""); - ta.expand_return_value = true; - err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE, - /* zero */ false, &deferred_work_generated); - expect_true(err, "Unexpected expand success"); - expect_zu_eq(2, ta.expand_count, ""); - - err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE, - &deferred_work_generated); - expect_false(err, "Unexpected shrink failure"); - expect_zu_eq(1, ta.shrink_count, ""); - ta.shrink_return_value = true; - err = pai_shrink( - tsdn, &sec.pai, edata, 2 * PAGE, PAGE, &deferred_work_generated); - expect_true(err, "Unexpected shrink success"); - expect_zu_eq(2, ta.shrink_count, ""); -} -TEST_END - -TEST_BEGIN(test_nshards_0) { - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; - /* See the note above -- we can't use the real tsd. */ - tsdn_t *tsdn = TSDN_NULL; - base_t *base = base_new(TSDN_NULL, /* ind */ 123, - &ehooks_default_extent_hooks, /* metadata_use_hooks */ true); - - sec_opts_t opts = SEC_OPTS_DEFAULT; +TEST_BEGIN(test_max_nshards_option_zero) { + test_data_t tdata; + sec_opts_t opts; opts.nshards = 0; - sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts); + opts.max_alloc = PAGE; + opts.max_bytes = 512 * PAGE; - bool deferred_work_generated = false; - edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ false, - &deferred_work_generated); - pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); - /* Both operations should have gone directly to the fallback. */ - expect_zu_eq(1, ta.alloc_count, ""); - expect_zu_eq(1, ta.dalloc_count, ""); + edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE); + expect_ptr_null(edata, "SEC should be disabled when nshards==0"); + destroy_test_data(tsdn, &tdata); } TEST_END +TEST_BEGIN(test_max_alloc_option_too_small) { + test_data_t tdata; + sec_opts_t opts; + opts.nshards = 1; + opts.max_alloc = 2 * PAGE; + opts.max_bytes = 512 * PAGE; + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); + + edata_t *edata = sec_alloc(tsdn, &tdata.sec, 3 * PAGE); + expect_ptr_null(edata, "max_alloc is 2*PAGE, should not alloc 3*PAGE"); + destroy_test_data(tsdn, &tdata); +} +TEST_END + +TEST_BEGIN(test_sec_fill) { + test_data_t tdata; + sec_opts_t opts; + opts.nshards = 1; + opts.max_alloc = 2 * PAGE; + opts.max_bytes = 4 * PAGE; + opts.batch_fill_extra = 2; + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); + + /* Fill the cache with two extents */ + sec_stats_t stats = {0}; + edata_list_active_t allocs; + edata_list_active_init(&allocs); + edata_t edata1, edata2; + edata_size_set(&edata1, PAGE); + edata_size_set(&edata2, PAGE); + edata_list_active_append(&allocs, &edata1); + edata_list_active_append(&allocs, &edata2); + sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 2); + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(stats.bytes, 2 * PAGE, "SEC should have what we filled"); + expect_true(edata_list_active_empty(&allocs), + "extents should be consumed by sec"); + + /* Try to overfill and confirm that max_bytes is respected. */ + stats.bytes = 0; + edata_t edata5, edata4, edata3; + edata_size_set(&edata3, PAGE); + edata_size_set(&edata4, PAGE); + edata_size_set(&edata5, PAGE); + edata_list_active_append(&allocs, &edata3); + edata_list_active_append(&allocs, &edata4); + edata_list_active_append(&allocs, &edata5); + sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 3); + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq( + stats.bytes, opts.max_bytes, "SEC can't have more than max_bytes"); + expect_false(edata_list_active_empty(&allocs), "Not all should fit"); + expect_zu_eq(stats.total.noverfills, 1, "Expected one overfill"); + destroy_test_data(tsdn, &tdata); +} +TEST_END + +TEST_BEGIN(test_sec_alloc) { + test_data_t tdata; + sec_opts_t opts; + opts.nshards = 1; + opts.max_alloc = 2 * PAGE; + opts.max_bytes = 4 * PAGE; + opts.batch_fill_extra = 1; + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); + + /* Alloc from empty cache returns NULL */ + edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE); + expect_ptr_null(edata, "SEC is empty"); + + /* Place two extents into the sec */ + edata_list_active_t allocs; + edata_list_active_init(&allocs); + edata_t edata1, edata2; + edata_size_set(&edata1, PAGE); + edata_list_active_append(&allocs, &edata1); + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_true(edata_list_active_empty(&allocs), ""); + edata_size_set(&edata2, PAGE); + edata_list_active_append(&allocs, &edata2); + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_true(edata_list_active_empty(&allocs), ""); + + sec_stats_t stats = {0}; + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(stats.bytes, 2 * PAGE, + "After fill bytes should reflect what is in the cache"); + stats.bytes = 0; + + /* Most recently cached extent should be used on alloc */ + edata = sec_alloc(tsdn, &tdata.sec, PAGE); + expect_ptr_eq(edata, &edata2, "edata2 is most recently used"); + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(stats.bytes, PAGE, "One more item left in the cache"); + stats.bytes = 0; + + /* Alloc can still get extents from cache */ + edata = sec_alloc(tsdn, &tdata.sec, PAGE); + expect_ptr_eq(edata, &edata1, "SEC is not empty"); + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(stats.bytes, 0, "No more items after last one is popped"); + + /* And cache is empty again */ + edata = sec_alloc(tsdn, &tdata.sec, PAGE); + expect_ptr_null(edata, "SEC is empty"); + destroy_test_data(tsdn, &tdata); +} +TEST_END + +TEST_BEGIN(test_sec_dalloc) { + test_data_t tdata; + sec_opts_t opts; + opts.nshards = 1; + opts.max_alloc = PAGE; + opts.max_bytes = 2 * PAGE; + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); + + /* Return one extent into the cache */ + edata_list_active_t allocs; + edata_list_active_init(&allocs); + edata_t edata1; + edata_size_set(&edata1, PAGE); + edata_list_active_append(&allocs, &edata1); + + /* SEC is empty, we return one pointer to it */ + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_true( + edata_list_active_empty(&allocs), "extents should be consumed"); + + /* Return one more extent, so that we are at the limit */ + edata_t edata2; + edata_size_set(&edata2, PAGE); + edata_list_active_append(&allocs, &edata2); + /* Sec can take one more as well and we will be exactly at max_bytes */ + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_true( + edata_list_active_empty(&allocs), "extents should be consumed"); + + sec_stats_t stats = {0}; + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(stats.bytes, opts.max_bytes, "Size should match deallocs"); + stats.bytes = 0; + + /* + * We are at max_bytes. Now, we dalloc one more pointer and we go above + * the limit. This will force flush to 3/4 of max_bytes and given that + * we have max of 2 pages, we will have to flush two. We will not flush + * the one given in the input as it is the most recently used. + */ + edata_t edata3; + edata_size_set(&edata3, PAGE); + edata_list_active_append(&allocs, &edata3); + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_false( + edata_list_active_empty(&allocs), "extents should NOT be consumed"); + expect_ptr_ne( + edata_list_active_first(&allocs), &edata3, "edata3 is MRU"); + expect_ptr_ne( + edata_list_active_last(&allocs), &edata3, "edata3 is MRU"); + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(PAGE, stats.bytes, "Should have flushed"); + destroy_test_data(tsdn, &tdata); +} +TEST_END + +TEST_BEGIN(test_max_bytes_too_low) { + test_data_t tdata; + sec_opts_t opts; + opts.nshards = 1; + opts.max_alloc = 4 * PAGE; + opts.max_bytes = 2 * PAGE; + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); + + /* Return one extent into the cache. Item is too big */ + edata_list_active_t allocs; + edata_list_active_init(&allocs); + edata_t edata1; + edata_size_set(&edata1, 3 * PAGE); + edata_list_active_append(&allocs, &edata1); + + /* SEC is empty, we return one pointer to it */ + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_false( + edata_list_active_empty(&allocs), "extents should not be consumed"); + destroy_test_data(tsdn, &tdata); +} +TEST_END + +TEST_BEGIN(test_sec_flush) { + test_data_t tdata; + sec_opts_t opts; + opts.nshards = 1; + opts.max_alloc = 4 * PAGE; + opts.max_bytes = 1024 * PAGE; + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); + + /* We put in 10 one-page extents, and 10 four-page extents */ + edata_list_active_t allocs1; + edata_list_active_t allocs4; + edata_list_active_init(&allocs1); + edata_list_active_init(&allocs4); + enum { NALLOCS = 10 }; + edata_t edata1[NALLOCS]; + edata_t edata4[NALLOCS]; + for (int i = 0; i < NALLOCS; i++) { + edata_size_set(&edata1[i], PAGE); + edata_size_set(&edata4[i], 4 * PAGE); + + edata_list_active_append(&allocs1, &edata1[i]); + sec_dalloc(tsdn, &tdata.sec, &allocs1); + edata_list_active_append(&allocs4, &edata4[i]); + sec_dalloc(tsdn, &tdata.sec, &allocs4); + } + + sec_stats_t stats = {0}; + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq( + stats.bytes, 10 * 5 * PAGE, "SEC should have what we filled"); + stats.bytes = 0; + + expect_true(edata_list_active_empty(&allocs1), ""); + sec_flush(tsdn, &tdata.sec, &allocs1); + expect_false(edata_list_active_empty(&allocs1), ""); + + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(stats.bytes, 0, "SEC should be empty"); + stats.bytes = 0; + destroy_test_data(tsdn, &tdata); +} +TEST_END + +TEST_BEGIN(test_sec_stats) { + test_data_t tdata; + sec_opts_t opts; + opts.nshards = 1; + opts.max_alloc = PAGE; + opts.max_bytes = 2 * PAGE; + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); + + edata_list_active_t allocs; + edata_list_active_init(&allocs); + edata_t edata1; + edata_size_set(&edata1, PAGE); + edata_list_active_append(&allocs, &edata1); + + /* SEC is empty alloc fails. nmisses==1 */ + edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE); + expect_ptr_null(edata, "SEC should be empty"); + + /* SEC is empty, we return one pointer to it. ndalloc_noflush=1 */ + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_true( + edata_list_active_empty(&allocs), "extents should be consumed"); + + edata_t edata2; + edata_size_set(&edata2, PAGE); + edata_list_active_append(&allocs, &edata2); + /* Sec can take one more, so ndalloc_noflush=2 */ + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_true( + edata_list_active_empty(&allocs), "extents should be consumed"); + + sec_stats_t stats; + memset(&stats, 0, sizeof(sec_stats_t)); + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(stats.bytes, opts.max_bytes, "Size should match deallocs"); + expect_zu_eq(stats.total.ndalloc_noflush, 2, ""); + expect_zu_eq(stats.total.nmisses, 1, ""); + + memset(&stats, 0, sizeof(sec_stats_t)); + + /* + * We are at max_bytes. Now, we dalloc one more pointer and we go above + * the limit. This will force flush, so ndalloc_flush = 1. + */ + edata_t edata3; + edata_size_set(&edata3, PAGE); + edata_list_active_append(&allocs, &edata3); + sec_dalloc(tsdn, &tdata.sec, &allocs); + expect_false( + edata_list_active_empty(&allocs), "extents should NOT be consumed"); + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(PAGE, stats.bytes, "Should have flushed"); + expect_zu_eq(stats.total.ndalloc_flush, 1, ""); + memset(&stats, 0, sizeof(sec_stats_t)); + destroy_test_data(tsdn, &tdata); +} +TEST_END + +#define NOPS_PER_THREAD 100 +#define NPREFILL 32 + static void -expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) { - sec_stats_t stats; +edata_init_test(edata_t *edata) { + memset(edata, 0, sizeof(*edata)); +} + +typedef struct { + sec_t *sec; + uint8_t preferred_shard; + size_t nallocs; + size_t nallocs_fail; + size_t ndallocs; + size_t ndallocs_fail; + edata_list_active_t fill_list; + size_t fill_list_sz; + edata_t *edata[NOPS_PER_THREAD]; +} trylock_test_arg_t; + +static void * +thd_trylock_test(void *varg) { + trylock_test_arg_t *arg = (trylock_test_arg_t *)varg; + tsd_t *tsd = tsd_fetch(); + tsdn_t *tsdn = tsd_tsdn(tsd); + + /* Set the preferred shard for this thread */ + uint8_t *shard_idx = tsd_sec_shardp_get(tsd); + *shard_idx = arg->preferred_shard; + + /* Fill the shard with some extents */ + sec_fill(tsdn, arg->sec, PAGE, &arg->fill_list, arg->fill_list_sz); + expect_true(edata_list_active_empty(&arg->fill_list), ""); + + for (unsigned i = 0; i < NOPS_PER_THREAD; i++) { + /* Try to allocate from SEC */ + arg->edata[i] = sec_alloc(tsdn, arg->sec, PAGE); + if (arg->edata[i] != NULL) { + expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, ""); + } + } + + for (unsigned i = 0; i < NOPS_PER_THREAD; i++) { + if (arg->edata[i] != NULL) { + edata_list_active_t list; + edata_list_active_init(&list); + arg->nallocs++; + edata_list_active_append(&list, arg->edata[i]); + expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, ""); + sec_dalloc(tsdn, arg->sec, &list); + if (edata_list_active_empty(&list)) { + arg->ndallocs++; + } else { + arg->ndallocs_fail++; + } + } else { + arg->nallocs_fail++; + } + } + + return NULL; +} + +TEST_BEGIN(test_sec_multishard) { + test_data_t tdata; + sec_opts_t opts; + enum { NSHARDS = 2 }; + enum { NTHREADS = NSHARDS * 16 }; + opts.nshards = NSHARDS; + opts.max_alloc = 2 * PAGE; + opts.max_bytes = 64 * NTHREADS * PAGE; + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + test_data_init(tsdn, &tdata, &opts); + + /* Create threads with different preferred shards */ + thd_t thds[NTHREADS]; + trylock_test_arg_t args[NTHREADS]; + + edata_t all_edatas[NPREFILL * NTHREADS]; + + for (unsigned i = 0; i < NTHREADS; i++) { + edata_list_active_init(&args[i].fill_list); + for (unsigned j = 0; j < NPREFILL; ++j) { + size_t ind = i * NPREFILL + j; + edata_init_test(&all_edatas[ind]); + edata_size_set(&all_edatas[ind], PAGE); + edata_list_active_append( + &args[i].fill_list, &all_edatas[ind]); + } + args[i].fill_list_sz = NPREFILL; + args[i].sec = &tdata.sec; + args[i].preferred_shard = i % opts.nshards; + args[i].nallocs = 0; + args[i].nallocs_fail = 0; + args[i].ndallocs = 0; + args[i].ndallocs_fail = 0; + memset( + &args[i].edata[0], 0, NOPS_PER_THREAD * sizeof(edata_t *)); + thd_create(&thds[i], thd_trylock_test, &args[i]); + } + + for (unsigned i = 0; i < NTHREADS; i++) { + thd_join(thds[i], NULL); + } + + /* Wait for all threads to complete */ + size_t total_allocs = 0; + size_t total_dallocs = 0; + size_t total_allocs_fail = 0; + for (unsigned i = 0; i < NTHREADS; i++) { + total_allocs += args[i].nallocs; + total_dallocs += args[i].ndallocs; + total_allocs_fail += args[i].nallocs_fail; + } + + /* We must have at least some hits */ + expect_zu_gt(total_allocs, 0, ""); /* - * Check that the stats merging accumulates rather than overwrites by - * putting some (made up) data there to begin with. + * We must have at least some successful dallocs by design (max_bytes is + * big enough). */ - stats.bytes = 123; - sec_stats_merge(tsdn, sec, &stats); - assert_zu_le(npages * PAGE + 123, stats.bytes, ""); -} + expect_zu_gt(total_dallocs, 0, ""); -TEST_BEGIN(test_stats_simple) { - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; + /* Get final stats to verify that hits and misses are accurate */ + sec_stats_t stats = {0}; + memset(&stats, 0, sizeof(sec_stats_t)); + sec_stats_merge(tsdn, &tdata.sec, &stats); + expect_zu_eq(stats.total.nhits, total_allocs, ""); + expect_zu_eq(stats.total.nmisses, total_allocs_fail, ""); - /* See the note above -- we can't use the real tsd. */ - tsdn_t *tsdn = TSDN_NULL; - - enum { - NITERS = 100, - FLUSH_PAGES = 20, - }; - - bool deferred_work_generated = false; - - test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE, - /* max_bytes */ FLUSH_PAGES * PAGE); - - edata_t *allocs[FLUSH_PAGES]; - for (size_t i = 0; i < FLUSH_PAGES; i++) { - allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - expect_stats_pages(tsdn, &sec, 0); - } - - /* Increase and decrease, without flushing. */ - for (size_t i = 0; i < NITERS; i++) { - for (size_t j = 0; j < FLUSH_PAGES / 2; j++) { - pai_dalloc(tsdn, &sec.pai, allocs[j], - &deferred_work_generated); - expect_stats_pages(tsdn, &sec, j + 1); - } - for (size_t j = 0; j < FLUSH_PAGES / 2; j++) { - allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, - /* frequent_reuse */ false, - &deferred_work_generated); - expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1); - } - } -} -TEST_END - -TEST_BEGIN(test_stats_auto_flush) { - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; - - /* See the note above -- we can't use the real tsd. */ - tsdn_t *tsdn = TSDN_NULL; - - enum { - FLUSH_PAGES = 10, - }; - - test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE, - /* max_bytes */ FLUSH_PAGES * PAGE); - - edata_t *extra_alloc0; - edata_t *extra_alloc1; - edata_t *allocs[2 * FLUSH_PAGES]; - - bool deferred_work_generated = false; - - extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false, - /* guarded */ false, /* frequent_reuse */ false, - &deferred_work_generated); - extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false, - /* guarded */ false, /* frequent_reuse */ false, - &deferred_work_generated); - - for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) { - allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - } - - for (size_t i = 0; i < FLUSH_PAGES; i++) { - pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated); - } - pai_dalloc(tsdn, &sec.pai, extra_alloc0, &deferred_work_generated); - - /* Flush the remaining pages; stats should still work. */ - for (size_t i = 0; i < FLUSH_PAGES; i++) { - pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i], - &deferred_work_generated); - } - - pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated); - - expect_stats_pages(tsdn, &sec, - ta.alloc_count + ta.alloc_batch_count - ta.dalloc_count - - ta.dalloc_batch_count); -} -TEST_END - -TEST_BEGIN(test_stats_manual_flush) { - pai_test_allocator_t ta; - pai_test_allocator_init(&ta); - sec_t sec; - - /* See the note above -- we can't use the real tsd. */ - tsdn_t *tsdn = TSDN_NULL; - - enum { - FLUSH_PAGES = 10, - }; - - test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE, - /* max_bytes */ FLUSH_PAGES * PAGE); - - bool deferred_work_generated = false; - edata_t *allocs[FLUSH_PAGES]; - for (size_t i = 0; i < FLUSH_PAGES; i++) { - allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, - /* zero */ false, /* guarded */ false, /* frequent_reuse */ - false, &deferred_work_generated); - expect_stats_pages(tsdn, &sec, 0); - } - - /* Dalloc the first half of the allocations. */ - for (size_t i = 0; i < FLUSH_PAGES / 2; i++) { - pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated); - expect_stats_pages(tsdn, &sec, i + 1); - } - - sec_flush(tsdn, &sec); - expect_stats_pages(tsdn, &sec, 0); - - /* Flush the remaining pages. */ - for (size_t i = 0; i < FLUSH_PAGES / 2; i++) { - pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i], - &deferred_work_generated); - expect_stats_pages(tsdn, &sec, i + 1); - } - sec_disable(tsdn, &sec); - expect_stats_pages(tsdn, &sec, 0); + destroy_test_data(tsdn, &tdata); } TEST_END int main(void) { - return test(test_reuse, test_auto_flush, test_disable, test_flush, - test_max_alloc_respected, test_expand_shrink_delegate, - test_nshards_0, test_stats_simple, test_stats_auto_flush, - test_stats_manual_flush); + return test(test_max_nshards_option_zero, + test_max_alloc_option_too_small, test_sec_fill, test_sec_alloc, + test_sec_dalloc, test_max_bytes_too_low, test_sec_flush, + test_sec_stats, test_sec_multishard); }