diff --git a/Makefile.in b/Makefile.in index 435fc34d..5799e6f5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -242,6 +242,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \ $(srcroot)test/unit/hpa_background_thread.c \ $(srcroot)test/unit/hpdata.c \ + $(srcroot)test/unit/extent_alloc_flags.c \ $(srcroot)test/unit/huge.c \ $(srcroot)test/unit/inspect.c \ $(srcroot)test/unit/junk.c \ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 82035fe3..e7a8221c 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -63,6 +63,7 @@ typedef struct ctl_stats_s { size_t resident; size_t mapped; size_t retained; + size_t pinned; background_thread_stats_t background_thread; mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes]; diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 06b6c545..13917a9b 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -110,8 +110,10 @@ struct edata_s { * i: szind * f: nfree * s: bin_shard + * h: is_head + * n: pinned * - * 00000000 ... 0000ssss ssffffff ffffiiii iiiitttg zpcbaaaa aaaaaaaa + * 00000000 ... 0nhsssss ssffffff ffffiiii iiiitttg zpcbaaaa aaaaaaaa * * arena_ind: Arena from which this extent came, or all 1 bits if * unassociated. @@ -145,6 +147,12 @@ struct edata_s { * nfree: Number of free regions in slab. * * bin_shard: the shard of the bin from which this extent came. + * + * is_head: see comments in ehooks_default_merge_impl(). + * + * pinned: true if the alloc hook signaled non-reclaimable backing + * (via bit 0 of the returned pointer). Pinned extents + * are routed to ecache_pinned, separate from dirty/decay. */ uint64_t e_bits; #define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) \ @@ -210,6 +218,16 @@ struct edata_s { #define EDATA_BITS_IS_HEAD_MASK \ MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT) +#define EDATA_BITS_PINNED_WIDTH 1 +#define EDATA_BITS_PINNED_SHIFT \ + (EDATA_BITS_IS_HEAD_WIDTH + EDATA_BITS_IS_HEAD_SHIFT) +#define EDATA_BITS_PINNED_MASK \ + MASK(EDATA_BITS_PINNED_WIDTH, EDATA_BITS_PINNED_SHIFT) + +#if (EDATA_BITS_PINNED_SHIFT + EDATA_BITS_PINNED_WIDTH > 64) +#error "edata_t e_bits overflow" +#endif + /* Pointer to the extent that this structure is responsible for. */ void *e_addr; @@ -538,6 +556,29 @@ edata_ps_set(edata_t *edata, hpdata_t *ps) { edata->e_ps = ps; } +static inline bool +edata_pinned_get(const edata_t *edata) { + return (bool)((edata->e_bits & EDATA_BITS_PINNED_MASK) + >> EDATA_BITS_PINNED_SHIFT); +} + +static inline void +edata_pinned_set(edata_t *edata, bool pinned) { + edata->e_bits = (edata->e_bits & ~EDATA_BITS_PINNED_MASK) + | ((uint64_t)pinned << EDATA_BITS_PINNED_SHIFT); +} + +static inline void +edata_hook_flags_init(edata_t *edata, unsigned alloc_flags) { + edata_pinned_set(edata, + (alloc_flags & EXTENT_ALLOC_FLAG_PINNED) != 0); +} + +static inline unsigned +edata_alloc_flags_get(const edata_t *edata) { + return edata_pinned_get(edata) ? EXTENT_ALLOC_FLAG_PINNED : 0; +} + static inline void edata_szind_set(edata_t *edata, szind_t szind) { assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */ @@ -686,6 +727,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size, edata_committed_set(edata, committed); edata_pai_set(edata, pai); edata_is_head_set(edata, is_head == EXTENT_IS_HEAD); + edata_hook_flags_init(edata, 0); if (config_prof) { edata_prof_tctx_set(edata, NULL); } @@ -711,6 +753,7 @@ edata_binit( * wasting a state bit to encode this fact. */ edata_pai_set(edata, EXTENT_PAI_PAC); + edata_hook_flags_init(edata, 0); } static inline int diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h index c65e189a..a7c93e4f 100644 --- a/include/jemalloc/internal/ehooks.h +++ b/include/jemalloc/internal/ehooks.h @@ -4,6 +4,7 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/tsd_types.h" @@ -189,9 +190,13 @@ ehooks_debug_zero_check(void *addr, size_t size) { } } +/* + * Allocate via extent hooks, stripping EXTENT_ALLOC_FLAG_* bits from the + * returned pointer into *alloc_flags (see jemalloc_typedefs.h.in). + */ static inline void * ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size, - size_t alignment, bool *zero, bool *commit) { + size_t alignment, bool *zero, bool *commit, unsigned *alloc_flags) { bool orig_zero = *zero; void *ret; extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks); @@ -204,6 +209,18 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size, alignment, zero, commit, ehooks_ind_get(ehooks)); ehooks_post_reentrancy(tsdn); } + /* Strip alloc flag bits from low bits of the returned pointer. */ +#if LG_PAGE < 8 +# error "Extent alloc flags require PAGE >= 256 (LG_PAGE >= 8)" +#endif + if (ret != NULL) { + *alloc_flags = (unsigned)((uintptr_t)ret + & EXTENT_ALLOC_FLAG_MASK); + ret = (void *)((uintptr_t)ret & ~(uintptr_t) + EXTENT_ALLOC_FLAG_MASK); + } else { + *alloc_flags = 0; + } assert(new_addr == NULL || ret == NULL || new_addr == ret); assert(!orig_zero || *zero); if (*zero && ret != NULL) { diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h index 88692356..f123d1b9 100644 --- a/include/jemalloc/internal/emap.h +++ b/include/jemalloc/internal/emap.h @@ -211,6 +211,7 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) { assert(edata_state_get(inner) == extent_state_active); assert(edata_state_get(outer) == extent_state_merging); assert(!edata_guarded_get(inner) && !edata_guarded_get(outer)); + assert(edata_pinned_get(inner) == edata_pinned_get(outer)); assert(edata_base_get(inner) == edata_past_get(outer) || edata_base_get(outer) == edata_past_get(inner)); } diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index a9f81cb7..964c64d0 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -120,6 +120,11 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents, */ return false; } + /* Do not merge pinned and non-pinned extents. */ + if (edata_pinned_get(edata) + != edata_pinned_get(neighbor)) { + return false; + } } else { if (neighbor_state == extent_state_active) { return false; diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h index 572200f3..37f6a377 100644 --- a/include/jemalloc/internal/mutex_prof.h +++ b/include/jemalloc/internal/mutex_prof.h @@ -30,6 +30,7 @@ typedef enum { OP(extents_dirty) \ OP(extents_muzzy) \ OP(extents_retained) \ + OP(extents_pinned) \ OP(decay_dirty) \ OP(decay_muzzy) \ OP(base) \ diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h index a19c8b35..cf1e6517 100644 --- a/include/jemalloc/internal/pac.h +++ b/include/jemalloc/internal/pac.h @@ -51,6 +51,8 @@ struct pac_estats_s { size_t muzzy_bytes; size_t nretained; size_t retained_bytes; + size_t npinned; + size_t pinned_bytes; }; typedef struct pac_stats_s pac_stats_t; @@ -64,6 +66,7 @@ struct pac_stats_s { * but they are excluded from the mapped statistic (above). */ size_t retained; /* Derived. */ + size_t pinned; /* Derived. */ /* * Number of bytes currently mapped, excluding retained memory (and any @@ -85,6 +88,9 @@ struct pac_s { * pointer). The handle to the allocation interface. */ pai_t pai; + /* True once pinned memory has been seen; co-located with ecache_dirty + * for cache-line locality on the alloc fast path. */ + atomic_b_t has_pinned; /* * Collections of extents that were previously allocated. These are * used when allocating extents, in an attempt to re-use address space. @@ -94,6 +100,7 @@ struct pac_s { ecache_t ecache_dirty; ecache_t ecache_muzzy; ecache_t ecache_retained; + ecache_t ecache_pinned; base_t *base; emap_t *emap; diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in index 793ee365..68c6f2ca 100644 --- a/include/jemalloc/jemalloc_typedefs.h.in +++ b/include/jemalloc/jemalloc_typedefs.h.in @@ -4,6 +4,17 @@ extern "C" { typedef struct extent_hooks_s extent_hooks_t; +/* + * Extent alloc flags. Custom alloc hooks may OR these into the returned + * pointer; jemalloc strips the low bits before use. Safe because all + * return values are page-aligned (PAGE >= 256). + */ +#define EXTENT_ALLOC_FLAG_PINNED 0x1U /* Non-reclaimable (e.g. HugeTLB). */ +#define EXTENT_ALLOC_FLAG_MASK 0xFFU /* Bits 1-7 reserved. */ +#if EXTENT_ALLOC_FLAG_MASK >= 256 +# error "EXTENT_ALLOC_FLAG_MASK must be < PAGE (256)" +#endif + /* * void * * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, diff --git a/src/arena.c b/src/arena.c index d7c8cd1f..412da14a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -796,6 +796,8 @@ arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) { tsd, &pac->ecache_muzzy.mtx, delayed_mtx, &n_delayed); arena_prepare_base_deletion_sync( tsd, &pac->ecache_retained.mtx, delayed_mtx, &n_delayed); + arena_prepare_base_deletion_sync( + tsd, &pac->ecache_pinned.mtx, delayed_mtx, &n_delayed); } arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n_delayed); } diff --git a/src/base.c b/src/base.c index ef7f0dd4..8f84d72a 100644 --- a/src/base.c +++ b/src/base.c @@ -52,8 +52,11 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) { if (ehooks_are_default(ehooks)) { addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit); } else { + /* alloc_flags intentionally ignored for base/metadata. */ + unsigned alloc_flags; addr = ehooks_alloc( - tsdn, ehooks, NULL, size, alignment, &zero, &commit); + tsdn, ehooks, NULL, size, alignment, &zero, &commit, + &alloc_flags); } return addr; diff --git a/src/ctl.c b/src/ctl.c index 0b72086c..e77e48e2 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -257,9 +257,11 @@ INDEX_PROTO(stats_arenas_i_lextents_j) CTL_PROTO(stats_arenas_i_extents_j_ndirty) CTL_PROTO(stats_arenas_i_extents_j_nmuzzy) CTL_PROTO(stats_arenas_i_extents_j_nretained) +CTL_PROTO(stats_arenas_i_extents_j_npinned) CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes) CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes) CTL_PROTO(stats_arenas_i_extents_j_retained_bytes) +CTL_PROTO(stats_arenas_i_extents_j_pinned_bytes) INDEX_PROTO(stats_arenas_i_extents_j) /* Merged set of stats for HPA shard. */ @@ -320,6 +322,7 @@ CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_pmuzzy) CTL_PROTO(stats_arenas_i_mapped) CTL_PROTO(stats_arenas_i_retained) +CTL_PROTO(stats_arenas_i_pinned) CTL_PROTO(stats_arenas_i_extent_avail) CTL_PROTO(stats_arenas_i_dirty_npurge) CTL_PROTO(stats_arenas_i_dirty_nmadvise) @@ -355,6 +358,7 @@ CTL_PROTO(stats_metadata_thp) CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) CTL_PROTO(stats_retained) +CTL_PROTO(stats_pinned) CTL_PROTO(stats_zero_reallocs) CTL_PROTO(approximate_stats_active) CTL_PROTO(experimental_hooks_install) @@ -697,9 +701,11 @@ static const ctl_named_node_t stats_arenas_i_extents_j_node[] = { {NAME("ndirty"), CTL(stats_arenas_i_extents_j_ndirty)}, {NAME("nmuzzy"), CTL(stats_arenas_i_extents_j_nmuzzy)}, {NAME("nretained"), CTL(stats_arenas_i_extents_j_nretained)}, + {NAME("npinned"), CTL(stats_arenas_i_extents_j_npinned)}, {NAME("dirty_bytes"), CTL(stats_arenas_i_extents_j_dirty_bytes)}, {NAME("muzzy_bytes"), CTL(stats_arenas_i_extents_j_muzzy_bytes)}, - {NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}}; + {NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}, + {NAME("pinned_bytes"), CTL(stats_arenas_i_extents_j_pinned_bytes)}}; static const ctl_named_node_t super_stats_arenas_i_extents_j_node[] = { {NAME(""), CHILD(named, stats_arenas_i_extents_j)}}; @@ -807,6 +813,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("pmuzzy"), CTL(stats_arenas_i_pmuzzy)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, {NAME("retained"), CTL(stats_arenas_i_retained)}, + {NAME("pinned"), CTL(stats_arenas_i_pinned)}, {NAME("extent_avail"), CTL(stats_arenas_i_extent_avail)}, {NAME("dirty_npurge"), CTL(stats_arenas_i_dirty_npurge)}, {NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)}, @@ -872,6 +879,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("resident"), CTL(stats_resident)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("retained"), CTL(stats_retained)}, + {NAME("pinned"), CTL(stats_pinned)}, {NAME("background_thread"), CHILD(named, stats_background_thread)}, {NAME("mutexes"), CHILD(named, stats_mutexes)}, {NAME("arenas"), CHILD(indexed, stats_arenas)}, @@ -1111,6 +1119,8 @@ ctl_arena_stats_sdmerge( sdstats->astats.mapped += astats->astats.mapped; sdstats->astats.pa_shard_stats.pac_stats.retained += astats->astats.pa_shard_stats.pac_stats.retained; + sdstats->astats.pa_shard_stats.pac_stats.pinned += + astats->astats.pa_shard_stats.pac_stats.pinned; sdstats->astats.pa_shard_stats.edata_avail += astats->astats.pa_shard_stats.edata_avail; } @@ -1247,12 +1257,16 @@ ctl_arena_stats_sdmerge( sdstats->estats[i].nmuzzy += astats->estats[i].nmuzzy; sdstats->estats[i].nretained += astats->estats[i].nretained; + sdstats->estats[i].npinned += + astats->estats[i].npinned; sdstats->estats[i].dirty_bytes += astats->estats[i].dirty_bytes; sdstats->estats[i].muzzy_bytes += astats->estats[i].muzzy_bytes; sdstats->estats[i].retained_bytes += astats->estats[i].retained_bytes; + sdstats->estats[i].pinned_bytes += + astats->estats[i].pinned_bytes; } /* Merge HPA stats. */ @@ -1367,6 +1381,8 @@ ctl_refresh(tsdn_t *tsdn) { ctl_stats->mapped = ctl_sarena->astats->astats.mapped; ctl_stats->retained = ctl_sarena->astats->astats.pa_shard_stats .pac_stats.retained; + ctl_stats->pinned = ctl_sarena->astats->astats.pa_shard_stats + .pac_stats.pinned; ctl_background_thread_stats_read(tsdn); @@ -3721,6 +3737,7 @@ CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t) CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t) CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t) +CTL_RO_CGEN(config_stats, stats_pinned, ctl_stats->pinned, size_t) CTL_RO_CGEN(config_stats, stats_background_thread_num_threads, ctl_stats->background_thread.num_threads, size_t) @@ -3786,6 +3803,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, arenas_i(mib[2])->astats->astats.mapped, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_retained, arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.retained, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_pinned, + arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pinned, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail, arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t) @@ -3958,6 +3977,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_dirty.mtx); MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_muzzy.mtx); MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_retained.mtx); + MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_pinned.mtx); MUTEX_PROF_RESET(arena->pa_shard.pac.decay_dirty.mtx); MUTEX_PROF_RESET(arena->pa_shard.pac.decay_muzzy.mtx); MUTEX_PROF_RESET(arena->tcache_ql_mtx); @@ -4034,12 +4054,16 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nmuzzy, arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t); CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nretained, arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_npinned, + arenas_i(mib[2])->astats->estats[mib[4]].npinned, size_t); CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_dirty_bytes, arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t); CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_muzzy_bytes, arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t); CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_retained_bytes, arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_pinned_bytes, + arenas_i(mib[2])->astats->estats[mib[4]].pinned_bytes, size_t); static const ctl_named_node_t * stats_arenas_i_extents_j_index( diff --git a/src/eset.c b/src/eset.c index 4a427d78..c73623d6 100644 --- a/src/eset.c +++ b/src/eset.c @@ -98,7 +98,10 @@ eset_insert(eset_t *eset, edata_t *edata) { eset_stats_add(eset, pind, size); } - edata_list_inactive_append(&eset->lru, edata); + /* Pinned extents skip LRU; they are never evicted. */ + if (!edata_pinned_get(edata)) { + edata_list_inactive_append(&eset->lru, edata); + } size_t npages = size >> LG_PAGE; /* * All modifications to npages hold the mutex (as asserted above), so we @@ -143,7 +146,9 @@ eset_remove(eset_t *eset, edata_t *edata) { edata_heap_first(&eset->bins[pind].heap)); } } - edata_list_inactive_remove(&eset->lru, edata); + if (!edata_pinned_get(edata)) { + edata_list_inactive_remove(&eset->lru, edata); + } size_t npages = size >> LG_PAGE; /* * As in eset_insert, we hold eset->mtx and so don't need atomic diff --git a/src/extent.c b/src/extent.c index 118c8785..222c5050 100644 --- a/src/extent.c +++ b/src/extent.c @@ -70,6 +70,7 @@ extent_may_force_decay(pac_t *pac) { static bool extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) { + malloc_mutex_assert_owner(tsdn, &ecache->mtx); emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active); bool coalesced; @@ -244,9 +245,9 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache, } /* * Leak extent after making sure its pages have already been purged, so - * that this is only a virtual memory leak. + * that this is only a virtual memory leak, except when it is pinned. */ - if (ecache->state == extent_state_dirty) { + if (ecache->state == extent_state_dirty && !edata_pinned_get(edata)) { if (extent_purge_lazy_impl( tsdn, ehooks, edata, 0, sz, growing_retained)) { extent_purge_forced_impl(tsdn, ehooks, edata, 0, @@ -734,9 +735,10 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, bool zeroed = false; bool committed = false; + unsigned alloc_flags; void *ptr = ehooks_alloc( - tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed, &committed); - + tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed, &committed, + &alloc_flags); if (ptr == NULL) { edata_cache_put(tsdn, pac->edata_cache, edata); goto label_err; @@ -746,6 +748,11 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, edata_init(edata, ind, ptr, alloc_size, false, SC_NSIZES, extent_sn_next(pac), extent_state_active, zeroed, committed, EXTENT_PAI_PAC, EXTENT_IS_HEAD); + edata_hook_flags_init(edata, alloc_flags); + if (alloc_flags & EXTENT_ALLOC_FLAG_PINNED) { + atomic_store_b(&pac->has_pinned, true, + ATOMIC_RELAXED); + } if (extent_register_no_gdump_add(tsdn, pac, edata)) { edata_cache_put(tsdn, pac->edata_cache, edata); @@ -767,12 +774,22 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, if (result == extent_split_interior_ok) { if (lead != NULL) { - extent_record( - tsdn, pac, ehooks, &pac->ecache_retained, lead); + if (edata_pinned_get(lead)) { + ecache_dalloc(tsdn, pac, ehooks, + &pac->ecache_pinned, lead); + } else { + extent_record(tsdn, pac, ehooks, + &pac->ecache_retained, lead); + } } if (trail != NULL) { - extent_record( - tsdn, pac, ehooks, &pac->ecache_retained, trail); + if (edata_pinned_get(trail)) { + ecache_dalloc(tsdn, pac, ehooks, + &pac->ecache_pinned, trail); + } else { + extent_record(tsdn, pac, ehooks, + &pac->ecache_retained, trail); + } } } else { /* @@ -784,8 +801,13 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, if (config_prof) { extent_gdump_add(tsdn, to_salvage); } - extent_record(tsdn, pac, ehooks, &pac->ecache_retained, - to_salvage); + if (edata_pinned_get(to_salvage)) { + ecache_dalloc(tsdn, pac, ehooks, + &pac->ecache_pinned, to_salvage); + } else { + extent_record(tsdn, pac, ehooks, + &pac->ecache_retained, to_salvage); + } } if (to_leak != NULL) { extent_deregister_no_gdump_sub(tsdn, pac, to_leak); @@ -796,10 +818,12 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, } if (*commit && !edata_committed_get(edata)) { + /* Pinned memory must be committed by the hook. */ + assert(!edata_pinned_get(edata)); if (extent_commit_impl( tsdn, ehooks, edata, 0, edata_size_get(edata), true)) { - extent_record( - tsdn, pac, ehooks, &pac->ecache_retained, edata); + extent_record(tsdn, pac, ehooks, + &pac->ecache_retained, edata); goto label_err; } /* A successful commit should return zeroed memory. */ @@ -1019,9 +1043,9 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache, bool coalesced_unused; edata = extent_try_coalesce( tsdn, pac, ehooks, ecache, edata, &coalesced_unused); - } else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) { - assert(ecache == &pac->ecache_dirty); - /* Always coalesce large extents eagerly. */ + } else if (edata_size_get(edata) >= SC_LARGE_MINCLASS + && ecache == &pac->ecache_dirty) { + /* Dirty ecache always coalesces large extents eagerly. */ /** * Maximum size limit (max_size) for large extents waiting to be coalesced * in dirty ecache. @@ -1119,8 +1143,10 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, void *new_addr, return NULL; } size_t palignment = ALIGNMENT_CEILING(alignment, PAGE); + unsigned alloc_flags; void *addr = ehooks_alloc( - tsdn, ehooks, new_addr, size, palignment, &zero, commit); + tsdn, ehooks, new_addr, size, palignment, &zero, commit, + &alloc_flags); if (addr == NULL) { edata_cache_put(tsdn, pac->edata_cache, edata); return NULL; @@ -1129,6 +1155,11 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, void *new_addr, /* slab */ false, SC_NSIZES, extent_sn_next(pac), extent_state_active, zero, *commit, EXTENT_PAI_PAC, opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD); + edata_hook_flags_init(edata, alloc_flags); + if (alloc_flags & EXTENT_ALLOC_FLAG_PINNED) { + atomic_store_b(&pac->has_pinned, true, + ATOMIC_RELAXED); + } /* * Retained memory is not counted towards gdump. Only if an extent is * allocated as a separate mapping, i.e. growing_retained is false, then @@ -1328,6 +1359,7 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata, /* slab */ false, SC_NSIZES, edata_sn_get(edata), edata_state_get(edata), edata_zeroed_get(edata), edata_committed_get(edata), EXTENT_PAI_PAC, EXTENT_NOT_HEAD); + edata_hook_flags_init(trail, edata_alloc_flags_get(edata)); emap_prepare_t prepare; bool err = emap_split_prepare( tsdn, pac->emap, &prepare, edata, size_a, trail, size_b); @@ -1412,6 +1444,9 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a, : edata_sn_get(b)); edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b)); + assert(edata_pinned_get(a) == edata_pinned_get(b)); + edata_pinned_set(a, edata_pinned_get(a) || edata_pinned_get(b)); + emap_merge_commit(tsdn, pac->emap, &prepare, a, b); edata_cache_put(tsdn, pac->edata_cache, b); diff --git a/src/pa_extra.c b/src/pa_extra.c index ff45674f..2673d9cf 100644 --- a/src/pa_extra.c +++ b/src/pa_extra.c @@ -34,6 +34,7 @@ pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) { ecache_prefork(tsdn, &shard->pac.ecache_dirty); ecache_prefork(tsdn, &shard->pac.ecache_muzzy); ecache_prefork(tsdn, &shard->pac.ecache_retained); + ecache_prefork(tsdn, &shard->pac.ecache_pinned); if (shard->ever_used_hpa) { hpa_shard_prefork4(tsdn, &shard->hpa_shard); } @@ -50,6 +51,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) { ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty); ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy); ecache_postfork_parent(tsdn, &shard->pac.ecache_retained); + ecache_postfork_parent(tsdn, &shard->pac.ecache_pinned); malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx); malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx); malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx); @@ -64,6 +66,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) { ecache_postfork_child(tsdn, &shard->pac.ecache_dirty); ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy); ecache_postfork_child(tsdn, &shard->pac.ecache_retained); + ecache_postfork_child(tsdn, &shard->pac.ecache_pinned); malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx); malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx); malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx); @@ -107,12 +110,15 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, pa_shard_stats_out->pac_stats.retained += ecache_npages_get(&shard->pac.ecache_retained) << LG_PAGE; + pa_shard_stats_out->pac_stats.pinned += + ecache_npages_get(&shard->pac.ecache_pinned) << LG_PAGE; pa_shard_stats_out->edata_avail += atomic_load_zu( &shard->edata_cache.count, ATOMIC_RELAXED); size_t resident_pgs = 0; resident_pgs += pa_shard_nactive(shard); resident_pgs += pa_shard_ndirty(shard); + resident_pgs += ecache_npages_get(&shard->pac.ecache_pinned); *resident += (resident_pgs << LG_PAGE); /* Dirty decay stats */ @@ -147,22 +153,27 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, atomic_load_zu(&shard->pac.stats->abandoned_vm, ATOMIC_RELAXED)); for (pszind_t i = 0; i < SC_NPSIZES; i++) { - size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes, - retained_bytes; + size_t dirty, muzzy, retained, pinned, dirty_bytes, + muzzy_bytes, retained_bytes, pinned_bytes; dirty = ecache_nextents_get(&shard->pac.ecache_dirty, i); muzzy = ecache_nextents_get(&shard->pac.ecache_muzzy, i); retained = ecache_nextents_get(&shard->pac.ecache_retained, i); + pinned = ecache_nextents_get(&shard->pac.ecache_pinned, i); dirty_bytes = ecache_nbytes_get(&shard->pac.ecache_dirty, i); muzzy_bytes = ecache_nbytes_get(&shard->pac.ecache_muzzy, i); retained_bytes = ecache_nbytes_get( &shard->pac.ecache_retained, i); + pinned_bytes = ecache_nbytes_get( + &shard->pac.ecache_pinned, i); estats_out[i].ndirty = dirty; estats_out[i].nmuzzy = muzzy; estats_out[i].nretained = retained; + estats_out[i].npinned = pinned; estats_out[i].dirty_bytes = dirty_bytes; estats_out[i].muzzy_bytes = muzzy_bytes; estats_out[i].retained_bytes = retained_bytes; + estats_out[i].pinned_bytes = pinned_bytes; } if (shard->ever_used_hpa) { @@ -189,6 +200,8 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard, &shard->pac.ecache_muzzy.mtx, arena_prof_mutex_extents_muzzy); pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data, &shard->pac.ecache_retained.mtx, arena_prof_mutex_extents_retained); + pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data, + &shard->pac.ecache_pinned.mtx, arena_prof_mutex_extents_pinned); pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data, &shard->pac.decay_dirty.mtx, arena_prof_mutex_decay_dirty); pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data, diff --git a/src/pac.c b/src/pac.c index ed0f77c2..86e6d86a 100644 --- a/src/pac.c +++ b/src/pac.c @@ -72,6 +72,13 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap, /* delay_coalesce */ false)) { return true; } + /* Pinned (non-reclaimable) extents: no decay, delayed coalesce. + * State is extent_state_dirty; the edata pinned flag distinguishes. */ + if (ecache_init(tsdn, &pac->ecache_pinned, extent_state_dirty, ind, + /* delay_coalesce */ true)) { + return true; + } + atomic_store_b(&pac->has_pinned, false, ATOMIC_RELAXED); exp_grow_init(&pac->exp_grow); if (malloc_mutex_init(&pac->grow_mtx, "extent_grow", WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) { @@ -110,6 +117,17 @@ pac_may_have_muzzy(pac_t *pac) { return pac_decay_ms_get(pac, extent_state_muzzy) != 0; } +/* Route edata to ecache_pinned or ecache_dirty based on pinned flag. */ +static inline void +pac_ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, + edata_t *edata) { + if (edata_pinned_get(edata)) { + ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_pinned, edata); + } else { + ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata); + } +} + static size_t pac_alloc_retained_batched_size(size_t size) { if (size > SC_LARGE_MAXCLASS) { @@ -133,8 +151,17 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, assert(!guarded || alignment <= PAGE); size_t newly_mapped_size = 0; - edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty, - NULL, size, alignment, zero, guarded); + edata_t *edata = NULL; + + if (atomic_load_b(&pac->has_pinned, ATOMIC_RELAXED)) { + edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_pinned, + NULL, size, alignment, zero, guarded); + } + + if (edata == NULL) { + edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty, + NULL, size, alignment, zero, guarded); + } if (edata == NULL && pac_may_have_muzzy(pac)) { edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy, @@ -180,12 +207,17 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, edata, size, batched_size - size, /* holding_core_locks */ false); if (trail == NULL) { - ecache_dalloc(tsdn, pac, ehooks, - &pac->ecache_retained, edata); + if (edata_pinned_get(edata)) { + ecache_dalloc(tsdn, pac, ehooks, + &pac->ecache_pinned, edata); + } else { + ecache_dalloc(tsdn, pac, ehooks, + &pac->ecache_retained, edata); + } edata = NULL; } else { - ecache_dalloc(tsdn, pac, ehooks, - &pac->ecache_dirty, trail); + pac_ecache_dalloc(tsdn, pac, ehooks, + trail); } } @@ -277,11 +309,22 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, if (ehooks_merge_will_fail(ehooks)) { return true; } - edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty, - edata, expand_amount, PAGE, zero, /* guarded*/ false); - if (trail == NULL) { - trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy, - edata, expand_amount, PAGE, zero, /* guarded*/ false); + edata_t *trail = NULL; + if (edata_pinned_get(edata)) { + if (atomic_load_b(&pac->has_pinned, + ATOMIC_RELAXED)) { + trail = ecache_alloc(tsdn, pac, ehooks, + &pac->ecache_pinned, edata, expand_amount, + PAGE, zero, /* guarded */ false); + } + } else { + trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty, + edata, expand_amount, PAGE, zero, /* guarded */ false); + if (trail == NULL) { + trail = ecache_alloc(tsdn, pac, ehooks, + &pac->ecache_muzzy, edata, expand_amount, + PAGE, zero, /* guarded */ false); + } } if (trail == NULL) { trail = ecache_alloc_grow(tsdn, pac, ehooks, @@ -293,7 +336,14 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, return true; } if (extent_merge_wrapper(tsdn, pac, ehooks, edata, trail)) { - extent_dalloc_wrapper(tsdn, pac, ehooks, trail); + if (edata_pinned_get(trail)) { + atomic_store_b(&pac->has_pinned, true, + ATOMIC_RELAXED); + ecache_dalloc(tsdn, pac, ehooks, + &pac->ecache_pinned, trail); + } else { + extent_dalloc_wrapper(tsdn, pac, ehooks, trail); + } return true; } if (config_stats && mapped_add > 0) { @@ -320,8 +370,10 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, if (trail == NULL) { return true; } - ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, trail); - *deferred_work_generated = true; + pac_ecache_dalloc(tsdn, pac, ehooks, trail); + if (!edata_pinned_get(trail)) { + *deferred_work_generated = true; + } return false; } @@ -352,9 +404,10 @@ pac_dalloc_impl( } } - ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata); - /* Purging of deallocated pages is deferred */ - *deferred_work_generated = true; + pac_ecache_dalloc(tsdn, pac, ehooks, edata); + if (!edata_pinned_get(edata)) { + *deferred_work_generated = true; + } } static inline uint64_t @@ -721,7 +774,31 @@ pac_destroy(tsdn_t *tsdn, pac_t *pac) { * dss-based extents for later reuse. */ ehooks_t *ehooks = pac_ehooks_get(pac); - edata_t *edata; + edata_t *edata; + if (atomic_load_b(&pac->has_pinned, ATOMIC_RELAXED)) { + /* Drain pinned extents via heap (no LRU). */ + edata_list_inactive_t pinned_list; + edata_list_inactive_init(&pinned_list); + malloc_mutex_lock(tsdn, &pac->ecache_pinned.mtx); + while (eset_npages_get(&pac->ecache_pinned.eset) > 0) { + edata = eset_fit(&pac->ecache_pinned.eset, + PAGE, PAGE, /* exact_only */ false, SC_PTR_BITS); + if (edata == NULL) { + break; + } + eset_remove(&pac->ecache_pinned.eset, edata); + emap_update_edata_state(tsdn, pac->emap, edata, + extent_state_active); + edata_list_inactive_append(&pinned_list, edata); + } + malloc_mutex_unlock(tsdn, &pac->ecache_pinned.mtx); + while ((edata = edata_list_inactive_first(&pinned_list)) + != NULL) { + edata_list_inactive_remove(&pinned_list, edata); + extent_destroy_wrapper(tsdn, pac, ehooks, edata); + } + } + assert(ecache_npages_get(&pac->ecache_pinned) == 0); while ( (edata = ecache_evict(tsdn, pac, ehooks, &pac->ecache_retained, 0)) != NULL) { diff --git a/src/stats.c b/src/stats.c index 82458fec..33198636 100644 --- a/src/stats.c +++ b/src/stats.c @@ -712,6 +712,8 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) { COL_HDR(row, muzzy, NULL, right, 13, size) COL_HDR(row, nretained, NULL, right, 13, size) COL_HDR(row, retained, NULL, right, 13, size) + COL_HDR(row, npinned, NULL, right, 13, size) + COL_HDR(row, pinned, NULL, right, 13, size) COL_HDR(row, ntotal, NULL, right, 13, size) COL_HDR(row, total, NULL, right, 13, size) @@ -728,22 +730,27 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) { in_gap = false; for (j = 0; j < SC_NPSIZES; j++) { - size_t ndirty, nmuzzy, nretained, total, dirty_bytes, - muzzy_bytes, retained_bytes, total_bytes; + size_t ndirty, nmuzzy, nretained, npinned, total, + dirty_bytes, muzzy_bytes, retained_bytes, pinned_bytes, + total_bytes; stats_arenas_mib[4] = j; CTL_LEAF(stats_arenas_mib, 5, "ndirty", &ndirty, size_t); CTL_LEAF(stats_arenas_mib, 5, "nmuzzy", &nmuzzy, size_t); CTL_LEAF(stats_arenas_mib, 5, "nretained", &nretained, size_t); + CTL_LEAF(stats_arenas_mib, 5, "npinned", &npinned, size_t); CTL_LEAF( stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes, size_t); CTL_LEAF( stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes, size_t); CTL_LEAF(stats_arenas_mib, 5, "retained_bytes", &retained_bytes, size_t); + CTL_LEAF(stats_arenas_mib, 5, "pinned_bytes", &pinned_bytes, + size_t); - total = ndirty + nmuzzy + nretained; - total_bytes = dirty_bytes + muzzy_bytes + retained_bytes; + total = ndirty + nmuzzy + nretained + npinned; + total_bytes = dirty_bytes + muzzy_bytes + retained_bytes + + pinned_bytes; in_gap_prev = in_gap; in_gap = (total == 0); @@ -758,6 +765,8 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) { emitter_json_kv(emitter, "nmuzzy", emitter_type_size, &nmuzzy); emitter_json_kv( emitter, "nretained", emitter_type_size, &nretained); + emitter_json_kv( + emitter, "npinned", emitter_type_size, &npinned); emitter_json_kv( emitter, "dirty_bytes", emitter_type_size, &dirty_bytes); @@ -765,6 +774,8 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) { emitter, "muzzy_bytes", emitter_type_size, &muzzy_bytes); emitter_json_kv(emitter, "retained_bytes", emitter_type_size, &retained_bytes); + emitter_json_kv(emitter, "pinned_bytes", emitter_type_size, + &pinned_bytes); emitter_json_object_end(emitter); col_size.size_val = sz_pind2sz(j); @@ -775,6 +786,8 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) { col_muzzy.size_val = muzzy_bytes; col_nretained.size_val = nretained; col_retained.size_val = retained_bytes; + col_npinned.size_val = npinned; + col_pinned.size_val = pinned_bytes; col_ntotal.size_val = total; col_total.size_val = total_bytes; @@ -1166,7 +1179,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, unsigned nthreads; const char *dss; ssize_t dirty_decay_ms, muzzy_decay_ms; - size_t page, pactive, pdirty, pmuzzy, mapped, retained; + size_t page, pactive, pdirty, pmuzzy, mapped, retained, pinned; size_t base, internal, resident, metadata_edata, metadata_rtree, metadata_thp, extent_avail; uint64_t dirty_npurge, dirty_nmadvise, dirty_purged; @@ -1467,6 +1480,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, GET_AND_EMIT_MEM_STAT(mapped) GET_AND_EMIT_MEM_STAT(retained) + GET_AND_EMIT_MEM_STAT(pinned) GET_AND_EMIT_MEM_STAT(base) GET_AND_EMIT_MEM_STAT(internal) GET_AND_EMIT_MEM_STAT(metadata_edata) @@ -1872,7 +1886,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, * the transition to the emitter code. */ size_t allocated, active, metadata, metadata_edata, metadata_rtree, - metadata_thp, resident, mapped, retained; + metadata_thp, resident, mapped, retained, pinned; size_t num_background_threads; size_t zero_reallocs; uint64_t background_thread_num_runs, background_thread_run_interval; @@ -1886,6 +1900,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, CTL_GET("stats.resident", &resident, size_t); CTL_GET("stats.mapped", &mapped, size_t); CTL_GET("stats.retained", &retained, size_t); + CTL_GET("stats.pinned", &pinned, size_t); CTL_GET("stats.zero_reallocs", &zero_reallocs, size_t); @@ -1916,15 +1931,16 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, emitter_json_kv(emitter, "resident", emitter_type_size, &resident); emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped); emitter_json_kv(emitter, "retained", emitter_type_size, &retained); + emitter_json_kv(emitter, "pinned", emitter_type_size, &pinned); emitter_json_kv( emitter, "zero_reallocs", emitter_type_size, &zero_reallocs); emitter_table_printf(emitter, "Allocated: %zu, active: %zu, " "metadata: %zu (n_thp %zu, edata %zu, rtree %zu), resident: %zu, " - "mapped: %zu, retained: %zu\n", + "mapped: %zu, retained: %zu, pinned: %zu\n", allocated, active, metadata, metadata_thp, metadata_edata, - metadata_rtree, resident, mapped, retained); + metadata_rtree, resident, mapped, retained, pinned); /* Strange behaviors */ emitter_table_printf(emitter, diff --git a/test/unit/extent_alloc_flags.c b/test/unit/extent_alloc_flags.c new file mode 100644 index 00000000..05880411 --- /dev/null +++ b/test/unit/extent_alloc_flags.c @@ -0,0 +1,173 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_pinned_accessors) { + edata_t edata; + memset(&edata, 0, sizeof(edata)); + + edata_arena_ind_set(&edata, 42); + edata_slab_set(&edata, true); + edata_committed_set(&edata, true); + + /* Default: not pinned. */ + edata_hook_flags_init(&edata, 0); + expect_false(edata_pinned_get(&edata), "pinned should be false"); + expect_u_eq(0, edata_alloc_flags_get(&edata), + "alloc_flags should be 0"); + expect_u_eq(42, edata_arena_ind_get(&edata), + "arena_ind corrupted"); + expect_true(edata_slab_get(&edata), "slab corrupted"); + + /* Set pinned. */ + edata_hook_flags_init(&edata, EXTENT_ALLOC_FLAG_PINNED); + expect_true(edata_pinned_get(&edata), "pinned should be true"); + expect_u_eq(EXTENT_ALLOC_FLAG_PINNED, edata_alloc_flags_get(&edata), + "alloc_flags round-trip failed"); + expect_u_eq(42, edata_arena_ind_get(&edata), + "arena_ind corrupted"); + + /* Split inheritance: trail gets lead's flags. */ + edata_t trail; + memset(&trail, 0, sizeof(edata_t)); + edata_hook_flags_init(&trail, edata_alloc_flags_get(&edata)); + expect_true(edata_pinned_get(&trail), + "trail should inherit pinned from lead"); +} +TEST_END + +TEST_BEGIN(test_dirty_accounting) { + unsigned arena_ind; + size_t sz = sizeof(arena_ind); + + expect_d_eq(0, mallctl("arenas.create", &arena_ind, &sz, NULL, 0), + "arena creation failed"); + + void *ptrs[16]; + for (unsigned i = 0; i < 16; i++) { + ptrs[i] = mallocx(PAGE, MALLOCX_ARENA(arena_ind)); + expect_ptr_not_null(ptrs[i], "alloc %u failed", i); + } + for (unsigned i = 0; i < 16; i++) { + dallocx(ptrs[i], MALLOCX_ARENA(arena_ind)); + } + + /* Default hooks: alloc_flags=0, so ecache_pinned must be empty. */ + tsd_t *tsd = tsd_fetch(); + tsdn_t *tsdn = tsd_tsdn(tsd); + arena_t *arena = arena_get(tsdn, arena_ind, false); + expect_ptr_not_null(arena, "arena_get failed"); + + pac_t *pac = &arena->pa_shard.pac; + expect_zu_eq(0, ecache_npages_get(&pac->ecache_pinned), + "ecache_pinned should be empty with default hooks"); +} +TEST_END + +/* + * Custom alloc hook that sets EXTENT_ALLOC_FLAG_PINNED. + * Passthrough to default hooks via ehooks_default_extent_hooks. + */ +static void * +pinned_extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit, + unsigned arena_ind) { + void *ret = ehooks_default_extent_hooks.alloc( + (extent_hooks_t *)&ehooks_default_extent_hooks, + new_addr, size, alignment, zero, commit, arena_ind); + if (ret == NULL) { + return NULL; + } + return (void *)((uintptr_t)ret | EXTENT_ALLOC_FLAG_PINNED); +} + +static extent_hooks_t pinned_hooks = { + pinned_extent_alloc, + NULL, /* dalloc — force retain */ + NULL, /* destroy */ + NULL, /* commit */ + NULL, /* decommit */ + NULL, /* purge_lazy */ + NULL, /* purge_forced */ + NULL, /* split */ + NULL /* merge */ +}; + +TEST_BEGIN(test_pinned_stats) { + unsigned arena_ind; + size_t sz = sizeof(arena_ind); + extent_hooks_t *hooks_ptr = &pinned_hooks; + + /* Create arena with pinned hooks. */ + expect_d_eq(0, mallctl("arenas.create", &arena_ind, &sz, + &hooks_ptr, sizeof(hooks_ptr)), + "arena creation failed"); + + /* Allocate and free to populate ecache_pinned. */ + void *p = mallocx(PAGE * 4, MALLOCX_ARENA(arena_ind) + | MALLOCX_TCACHE_NONE); + expect_ptr_not_null(p, "alloc failed"); + dallocx(p, MALLOCX_TCACHE_NONE); + + /* Refresh stats. */ + uint64_t epoch = 1; + sz = sizeof(epoch); + expect_d_eq(0, mallctl("epoch", &epoch, &sz, &epoch, sizeof(epoch)), + "epoch failed"); + + /* Read total pinned stat. */ + char buf[128]; + size_t pinned_total; + sz = sizeof(pinned_total); + snprintf(buf, sizeof(buf), "stats.arenas.%u.pinned", arena_ind); + expect_d_eq(0, mallctl(buf, &pinned_total, &sz, NULL, 0), + "stats.arenas..pinned read failed"); + expect_zu_gt(pinned_total, 0, + "pinned total should be > 0 after free to pinned arena"); + + /* Destroy the arena. */ + snprintf(buf, sizeof(buf), "arena.%u.destroy", arena_ind); + expect_d_eq(0, mallctl(buf, NULL, NULL, NULL, 0), + "arena destroy failed"); +} +TEST_END + +TEST_BEGIN(test_pinned_hook_arena_destroy) { + unsigned arena_ind; + size_t sz = sizeof(arena_ind); + extent_hooks_t *hooks_ptr = &pinned_hooks; + + /* Create arena with pinned hooks. */ + expect_d_eq(0, mallctl("arenas.create", &arena_ind, &sz, + &hooks_ptr, sizeof(hooks_ptr)), + "arena creation failed"); + + /* Allocate, shrink, and free through the pinned arena. */ + void *ptrs[8]; + for (unsigned i = 0; i < 8; i++) { + ptrs[i] = mallocx(PAGE * 4, MALLOCX_ARENA(arena_ind) + | MALLOCX_TCACHE_NONE); + expect_ptr_not_null(ptrs[i], "alloc %u failed", i); + /* Shrink it to test pac_shrink_impl. */ + ptrs[i] = rallocx(ptrs[i], PAGE * 2, MALLOCX_ARENA(arena_ind) + | MALLOCX_TCACHE_NONE); + expect_ptr_not_null(ptrs[i], "shrink %u failed", i); + } + for (unsigned i = 0; i < 8; i++) { + dallocx(ptrs[i], MALLOCX_TCACHE_NONE); + } + + /* Destroy the arena — must not crash or assert. */ + char buf[64]; + snprintf(buf, sizeof(buf), "arena.%u.destroy", arena_ind); + expect_d_eq(0, mallctl(buf, NULL, NULL, NULL, 0), + "arena destroy failed"); +} +TEST_END + +int +main(void) { + return test_no_reentrancy( + test_pinned_accessors, + test_dirty_accounting, + test_pinned_stats, + test_pinned_hook_arena_destroy); +}