mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-05-14 00:46:21 +03:00
Introduce pinned extents to contain unpurgeable pages
Some pages (e.g., hugetlb pages) cannot be purged, and should be prioritized for reuse. A custom extent_alloc hook signals this by OR'ing EXTENT_ALLOC_FLAG_PINNED into the low bits of the returned pointer; jemalloc strips the flag bits and caches pinned extents in a dedicated ecache_pinned, separate from the dirty/muzzy decay pipeline. Pinned extents do not coalesce eagerly, except for ones larger than SC_LARGE_MINCLASS. A prefer-small policy reuses the smallest fitting pinned extent, to avoid unnecessary split/fragmentation.
This commit is contained in:
parent
7638093c73
commit
be2de8ccd8
22 changed files with 977 additions and 86 deletions
|
|
@ -63,6 +63,7 @@ typedef struct ctl_stats_s {
|
|||
size_t resident;
|
||||
size_t mapped;
|
||||
size_t retained;
|
||||
size_t pinned;
|
||||
|
||||
background_thread_stats_t background_thread;
|
||||
mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes];
|
||||
|
|
|
|||
|
|
@ -34,9 +34,10 @@ enum extent_state_e {
|
|||
extent_state_dirty = 1,
|
||||
extent_state_muzzy = 2,
|
||||
extent_state_retained = 3,
|
||||
extent_state_transition = 4, /* States below are intermediate. */
|
||||
extent_state_merging = 5,
|
||||
extent_state_max = 5 /* Sanity checking only. */
|
||||
extent_state_pinned = 4,
|
||||
extent_state_transition = 5, /* States below are intermediate. */
|
||||
extent_state_merging = 6,
|
||||
extent_state_max = 6 /* Sanity checking only. */
|
||||
};
|
||||
typedef enum extent_state_e extent_state_t;
|
||||
|
||||
|
|
@ -110,8 +111,10 @@ struct edata_s {
|
|||
* i: szind
|
||||
* f: nfree
|
||||
* s: bin_shard
|
||||
* h: is_head
|
||||
* n: pinned
|
||||
*
|
||||
* 00000000 ... 0000ssss ssffffff ffffiiii iiiitttg zpcbaaaa aaaaaaaa
|
||||
* 00000000 ... 0nhsssss ssffffff ffffiiii iiiitttg zpcbaaaa aaaaaaaa
|
||||
*
|
||||
* arena_ind: Arena from which this extent came, or all 1 bits if
|
||||
* unassociated.
|
||||
|
|
@ -145,6 +148,10 @@ struct edata_s {
|
|||
* nfree: Number of free regions in slab.
|
||||
*
|
||||
* bin_shard: the shard of the bin from which this extent came.
|
||||
*
|
||||
* is_head: see comments in ehooks_default_merge_impl().
|
||||
*
|
||||
* pinned: true if the alloc hook signaled non-reclaimable backing.
|
||||
*/
|
||||
uint64_t e_bits;
|
||||
#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) \
|
||||
|
|
@ -210,6 +217,16 @@ struct edata_s {
|
|||
#define EDATA_BITS_IS_HEAD_MASK \
|
||||
MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
|
||||
|
||||
#define EDATA_BITS_PINNED_WIDTH 1
|
||||
#define EDATA_BITS_PINNED_SHIFT \
|
||||
(EDATA_BITS_IS_HEAD_WIDTH + EDATA_BITS_IS_HEAD_SHIFT)
|
||||
#define EDATA_BITS_PINNED_MASK \
|
||||
MASK(EDATA_BITS_PINNED_WIDTH, EDATA_BITS_PINNED_SHIFT)
|
||||
|
||||
#if (EDATA_BITS_PINNED_SHIFT + EDATA_BITS_PINNED_WIDTH > 64)
|
||||
#error "edata_t e_bits overflow"
|
||||
#endif
|
||||
|
||||
/* Pointer to the extent that this structure is responsible for. */
|
||||
void *e_addr;
|
||||
|
||||
|
|
@ -538,6 +555,29 @@ edata_ps_set(edata_t *edata, hpdata_t *ps) {
|
|||
edata->e_ps = ps;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
edata_pinned_get(const edata_t *edata) {
|
||||
return (bool)((edata->e_bits & EDATA_BITS_PINNED_MASK)
|
||||
>> EDATA_BITS_PINNED_SHIFT);
|
||||
}
|
||||
|
||||
static inline void
|
||||
edata_pinned_set(edata_t *edata, bool pinned) {
|
||||
edata->e_bits = (edata->e_bits & ~EDATA_BITS_PINNED_MASK)
|
||||
| ((uint64_t)pinned << EDATA_BITS_PINNED_SHIFT);
|
||||
}
|
||||
|
||||
static inline void
|
||||
edata_hook_flags_init(edata_t *edata, unsigned alloc_flags) {
|
||||
edata_pinned_set(edata,
|
||||
(alloc_flags & EXTENT_ALLOC_FLAG_PINNED) != 0);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
edata_alloc_flags_get(const edata_t *edata) {
|
||||
return edata_pinned_get(edata) ? EXTENT_ALLOC_FLAG_PINNED : 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
edata_szind_set(edata_t *edata, szind_t szind) {
|
||||
assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
|
||||
|
|
@ -686,6 +726,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
|
|||
edata_committed_set(edata, committed);
|
||||
edata_pai_set(edata, pai);
|
||||
edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
|
||||
edata_hook_flags_init(edata, 0);
|
||||
if (config_prof) {
|
||||
edata_prof_tctx_set(edata, NULL);
|
||||
}
|
||||
|
|
@ -711,6 +752,7 @@ edata_binit(
|
|||
* wasting a state bit to encode this fact.
|
||||
*/
|
||||
edata_pai_set(edata, EXTENT_PAI_PAC);
|
||||
edata_hook_flags_init(edata, 0);
|
||||
}
|
||||
|
||||
static inline int
|
||||
|
|
|
|||
|
|
@ -191,7 +191,7 @@ ehooks_debug_zero_check(void *addr, size_t size) {
|
|||
|
||||
static inline void *
|
||||
ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
|
||||
size_t alignment, bool *zero, bool *commit) {
|
||||
size_t alignment, bool *zero, bool *commit, unsigned *alloc_flags) {
|
||||
bool orig_zero = *zero;
|
||||
void *ret;
|
||||
extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
|
||||
|
|
@ -204,6 +204,18 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
|
|||
alignment, zero, commit, ehooks_ind_get(ehooks));
|
||||
ehooks_post_reentrancy(tsdn);
|
||||
}
|
||||
#if LG_PAGE < 8
|
||||
# error "Extent alloc flags require page size of at least 256"
|
||||
#endif
|
||||
if (ret != NULL) {
|
||||
*alloc_flags = (unsigned)((uintptr_t)ret
|
||||
& EXTENT_ALLOC_FLAG_MASK);
|
||||
ret = (void *)((byte_t *)ret - *alloc_flags);
|
||||
/* Pinned hooks must also set *commit; pinned bypasses commit/decommit. */
|
||||
assert(!(*alloc_flags & EXTENT_ALLOC_FLAG_PINNED) || *commit);
|
||||
} else {
|
||||
*alloc_flags = 0;
|
||||
}
|
||||
assert(new_addr == NULL || ret == NULL || new_addr == ret);
|
||||
assert(!orig_zero || *zero);
|
||||
if (*zero && ret != NULL) {
|
||||
|
|
|
|||
|
|
@ -211,6 +211,7 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
|
|||
assert(edata_state_get(inner) == extent_state_active);
|
||||
assert(edata_state_get(outer) == extent_state_merging);
|
||||
assert(!edata_guarded_get(inner) && !edata_guarded_get(outer));
|
||||
assert(edata_pinned_get(inner) == edata_pinned_get(outer));
|
||||
assert(edata_base_get(inner) == edata_past_get(outer)
|
||||
|| edata_base_get(outer) == edata_past_get(inner));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,6 +73,6 @@ void eset_remove(eset_t *eset, edata_t *edata);
|
|||
* null if no such item could be found.
|
||||
*/
|
||||
edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment, bool exact_only,
|
||||
unsigned lg_max_fit);
|
||||
unsigned lg_max_fit, bool prefer_small);
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_ESET_H */
|
||||
|
|
|
|||
|
|
@ -120,6 +120,10 @@ extent_can_acquire_neighbor(const edata_t *edata, rtree_contents_t contents,
|
|||
*/
|
||||
return false;
|
||||
}
|
||||
/* Do not merge pinned and non-pinned extents. */
|
||||
if (edata_pinned_get(edata) != edata_pinned_get(neighbor)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (neighbor_state == extent_state_active) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ typedef enum {
|
|||
OP(extents_dirty) \
|
||||
OP(extents_muzzy) \
|
||||
OP(extents_retained) \
|
||||
OP(extents_pinned) \
|
||||
OP(decay_dirty) \
|
||||
OP(decay_muzzy) \
|
||||
OP(base) \
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ struct pac_estats_s {
|
|||
size_t muzzy_bytes;
|
||||
size_t nretained;
|
||||
size_t retained_bytes;
|
||||
size_t npinned;
|
||||
size_t pinned_bytes;
|
||||
};
|
||||
|
||||
typedef struct pac_stats_s pac_stats_t;
|
||||
|
|
@ -61,9 +63,14 @@ struct pac_stats_s {
|
|||
/*
|
||||
* Number of unused virtual memory bytes currently retained. Retained
|
||||
* bytes are technically mapped (though always decommitted or purged),
|
||||
* but they are excluded from the mapped statistic (above).
|
||||
* but they are excluded from pac_mapped.
|
||||
*/
|
||||
size_t retained; /* Derived. */
|
||||
/*
|
||||
* Number of bytes in pinned (non-reclaimable) extents currently
|
||||
* cached. Unlike retained, pinned bytes count toward pac_mapped.
|
||||
*/
|
||||
size_t pinned; /* Derived. */
|
||||
|
||||
/*
|
||||
* Number of bytes currently mapped, excluding retained memory (and any
|
||||
|
|
@ -85,6 +92,8 @@ struct pac_s {
|
|||
* pointer). The handle to the allocation interface.
|
||||
*/
|
||||
pai_t pai;
|
||||
/* True once pinned memory has been seen. */
|
||||
atomic_b_t has_pinned;
|
||||
/*
|
||||
* Collections of extents that were previously allocated. These are
|
||||
* used when allocating extents, in an attempt to re-use address space.
|
||||
|
|
@ -94,6 +103,7 @@ struct pac_s {
|
|||
ecache_t ecache_dirty;
|
||||
ecache_t ecache_muzzy;
|
||||
ecache_t ecache_retained;
|
||||
ecache_t ecache_pinned;
|
||||
|
||||
base_t *base;
|
||||
emap_t *emap;
|
||||
|
|
@ -160,6 +170,21 @@ pac_mapped(const pac_t *pac) {
|
|||
return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
|
||||
ecache_t *ecache, edata_t *edata);
|
||||
|
||||
static inline void
|
||||
pac_record_grown(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
|
||||
edata_t *edata) {
|
||||
bool pinned = edata_pinned_get(edata);
|
||||
if (pinned && config_stats) {
|
||||
atomic_fetch_add_zu(&pac->stats->pac_mapped,
|
||||
edata_size_get(edata), ATOMIC_RELAXED);
|
||||
}
|
||||
extent_record(tsdn, pac, ehooks,
|
||||
pinned ? &pac->ecache_pinned : &pac->ecache_retained, edata);
|
||||
}
|
||||
|
||||
static inline ehooks_t *
|
||||
pac_ehooks_get(const pac_t *pac) {
|
||||
return base_ehooks_get(pac->base);
|
||||
|
|
|
|||
|
|
@ -4,6 +4,36 @@ extern "C" {
|
|||
|
||||
typedef struct extent_hooks_s extent_hooks_t;
|
||||
|
||||
/*
|
||||
* Extent alloc flags. A custom extent_alloc hook may OR these into the
|
||||
* returned pointer; jemalloc strips the low bits before use. Safe because
|
||||
* returned addresses are at least page-aligned (PAGE >= 256).
|
||||
*
|
||||
* EXTENT_ALLOC_FLAG_PINNED: backing memory is non-reclaimable.
|
||||
* Pinned extents are excluded from decay/purging and cached separately for
|
||||
* preferential reuse. A hook returning this flag must also set *commit to
|
||||
* true: pinned memory bypasses jemalloc's commit/decommit machinery.
|
||||
*
|
||||
* The pinned attribute is per-extent: a single hook may return pinned and
|
||||
* non-pinned extents in different calls. Pinned and non-pinned extents are
|
||||
* never merged together (the merge would change the reclamation policy of
|
||||
* one half), so pinned-ness is set at allocation and inherited through
|
||||
* splits, but never changes after that.
|
||||
*
|
||||
* Example (HugeTLB alloc hook):
|
||||
* void *my_alloc(extent_hooks_t *h, void *new_addr, size_t size,
|
||||
* size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
|
||||
* void *addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
|
||||
* MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
|
||||
* if (addr == MAP_FAILED) return NULL;
|
||||
* *zero = true;
|
||||
* *commit = true;
|
||||
* return (void *)((uintptr_t)addr | EXTENT_ALLOC_FLAG_PINNED);
|
||||
* }
|
||||
*/
|
||||
#define EXTENT_ALLOC_FLAG_PINNED 0x1U
|
||||
#define EXTENT_ALLOC_FLAG_MASK 0xFFU
|
||||
|
||||
/*
|
||||
* void *
|
||||
* extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue