mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-14 14:41:42 +03:00
Cache extra extents in the dirty pool from ecache_alloc_grow
This commit is contained in:
parent
c067a55c79
commit
6035d4a8d3
3 changed files with 96 additions and 12 deletions
|
|
@ -311,6 +311,17 @@ sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
|
|||
}
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE size_t
|
||||
sz_s2u_compute_using_delta(size_t size) {
|
||||
size_t x = lg_floor((size<<1)-1);
|
||||
size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
|
||||
? LG_QUANTUM : x - SC_LG_NGROUP - 1;
|
||||
size_t delta = ZU(1) << lg_delta;
|
||||
size_t delta_mask = delta - 1;
|
||||
size_t usize = (size + delta_mask) & ~delta_mask;
|
||||
return usize;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE size_t
|
||||
sz_s2u_compute(size_t size) {
|
||||
if (unlikely(size > SC_LARGE_MAXCLASS)) {
|
||||
|
|
@ -329,13 +340,7 @@ sz_s2u_compute(size_t size) {
|
|||
}
|
||||
#endif
|
||||
if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) {
|
||||
size_t x = lg_floor((size<<1)-1);
|
||||
size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
|
||||
? LG_QUANTUM : x - SC_LG_NGROUP - 1;
|
||||
size_t delta = ZU(1) << lg_delta;
|
||||
size_t delta_mask = delta - 1;
|
||||
size_t usize = (size + delta_mask) & ~delta_mask;
|
||||
return usize;
|
||||
return sz_s2u_compute_using_delta(size);
|
||||
} else {
|
||||
/*
|
||||
* With sz_limit_usize_gap_enabled() == true, usize of a large
|
||||
|
|
|
|||
80
src/pac.c
80
src/pac.c
|
|
@ -112,10 +112,27 @@ pac_may_have_muzzy(pac_t *pac) {
|
|||
return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
|
||||
}
|
||||
|
||||
size_t pac_alloc_retained_batched_size(size_t size) {
|
||||
if (size > SC_LARGE_MAXCLASS) {
|
||||
/*
|
||||
* A valid input with usize SC_LARGE_MAXCLASS could still
|
||||
* reach here because of sz_large_pad. Such a request is valid
|
||||
* but we should not further increase it. Thus, directly
|
||||
* return size for such cases.
|
||||
*/
|
||||
return size;
|
||||
}
|
||||
size_t batched_size = sz_s2u_compute_using_delta(size);
|
||||
size_t next_hugepage_size = HUGEPAGE_CEILING(size);
|
||||
return batched_size > next_hugepage_size? next_hugepage_size:
|
||||
batched_size;
|
||||
}
|
||||
|
||||
static edata_t *
|
||||
pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
|
||||
size_t alignment, bool zero, bool guarded) {
|
||||
assert(!guarded || alignment <= PAGE);
|
||||
size_t newly_mapped_size = 0;
|
||||
|
||||
edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
|
||||
NULL, size, alignment, zero, guarded);
|
||||
|
|
@ -124,14 +141,69 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
|
|||
edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
|
||||
NULL, size, alignment, zero, guarded);
|
||||
}
|
||||
|
||||
/*
|
||||
* We batched allocate a larger extent when limit_usize_gap is enabled
|
||||
* because the reuse of extents in the dirty pool is worse without size
|
||||
* classes for large allocs. For instance, when limit_usize_gap is not
|
||||
* enabled, 1.1MB, 1.15MB, and 1.2MB allocs will all be ceiled to
|
||||
* 1.25MB and can reuse the same buffer if they are alloc & dalloc
|
||||
* sequentially. However, with limit_usize_gap enabled, they cannot
|
||||
* reuse the same buffer and their sequential allocs & dallocs will
|
||||
* result in three different extents. Thus, we cache extra mergeable
|
||||
* extents in the dirty pool to improve the reuse. We skip this
|
||||
* optimization if both maps_coalesce and opt_retain are disabled
|
||||
* because VM is not cheap enough to be used aggressively and extents
|
||||
* cannot be merged at will (only extents from the same VirtualAlloc
|
||||
* can be merged). Note that it could still be risky to cache more
|
||||
* extents when either mpas_coalesce or opt_retain is enabled. Yet
|
||||
* doing so is still beneficial in improving the reuse of extents
|
||||
* with some limits. This choice should be reevaluated if
|
||||
* pac_alloc_retained_batched_size is changed to be more aggressive.
|
||||
*/
|
||||
if (sz_limit_usize_gap_enabled() && edata == NULL &&
|
||||
(maps_coalesce || opt_retain)) {
|
||||
size_t batched_size = pac_alloc_retained_batched_size(size);
|
||||
/*
|
||||
* Note that ecache_alloc_grow will try to retrieve virtual
|
||||
* memory from both retained pool and directly from OS through
|
||||
* extent_alloc_wrapper if the retained pool has no qualified
|
||||
* extents. This is also why the overcaching still works even
|
||||
* with opt_retain off.
|
||||
*/
|
||||
edata = ecache_alloc_grow(tsdn, pac, ehooks,
|
||||
&pac->ecache_retained, NULL, batched_size,
|
||||
alignment, zero, guarded);
|
||||
|
||||
if (edata != NULL && batched_size > size) {
|
||||
edata_t *trail = extent_split_wrapper(tsdn, pac,
|
||||
ehooks, edata, size, batched_size - size,
|
||||
/* holding_core_locks */ false);
|
||||
if (trail == NULL) {
|
||||
ecache_dalloc(tsdn, pac, ehooks,
|
||||
&pac->ecache_retained, edata);
|
||||
edata = NULL;
|
||||
} else {
|
||||
ecache_dalloc(tsdn, pac, ehooks,
|
||||
&pac->ecache_dirty, trail);
|
||||
}
|
||||
}
|
||||
|
||||
if (edata != NULL) {
|
||||
newly_mapped_size = batched_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (edata == NULL) {
|
||||
edata = ecache_alloc_grow(tsdn, pac, ehooks,
|
||||
&pac->ecache_retained, NULL, size, alignment, zero,
|
||||
guarded);
|
||||
if (config_stats && edata != NULL) {
|
||||
atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
|
||||
ATOMIC_RELAXED);
|
||||
}
|
||||
newly_mapped_size = size;
|
||||
}
|
||||
|
||||
if (config_stats && newly_mapped_size != 0) {
|
||||
atomic_fetch_add_zu(&pac->stats->pac_mapped,
|
||||
newly_mapped_size, ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
return edata;
|
||||
|
|
|
|||
|
|
@ -410,7 +410,14 @@ TEST_BEGIN(test_decay_never) {
|
|||
/* Verify that each deallocation generates additional dirty pages. */
|
||||
size_t pdirty_prev = get_arena_pdirty(arena_ind);
|
||||
size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
|
||||
expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
|
||||
/*
|
||||
* With limit_usize_gap enabled, some more extents
|
||||
* are cached in the dirty pool, making the assumption below
|
||||
* not true.
|
||||
*/
|
||||
if (!sz_limit_usize_gap_enabled()) {
|
||||
expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
|
||||
}
|
||||
expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
|
||||
for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
|
||||
dallocx(ptrs[i], flags);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue