mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-05-16 18:06:22 +03:00
edata_cache: Allow unbounded fast caching.
The edata_cache_small had a fill/flush heuristic. In retrospect, this was a premature optimization; more testing indicates that an unbounded cache is effectively fine here, and moreover we spend a nontrivial amount of time doing unnecessary filling/flushing. As the HPA takes on a larger and larger fraction of all allocations, any theoretical differences in allocation patterns should shrink. The HPA is more efficient with its metadata in general, so it still comes out ahead on metadata usage anyways.
This commit is contained in:
parent
d93eef2f40
commit
92a1e38f52
5 changed files with 99 additions and 151 deletions
|
|
@ -56,39 +56,34 @@ edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
|
|||
}
|
||||
|
||||
void
|
||||
edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
|
||||
edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback) {
|
||||
edata_list_inactive_init(&ecs->list);
|
||||
ecs->count = 0;
|
||||
ecs->fallback = fallback;
|
||||
ecs->disabled = false;
|
||||
}
|
||||
|
||||
static void
|
||||
edata_cache_small_try_fill_from_fallback(tsdn_t *tsdn,
|
||||
edata_cache_small_t *ecs) {
|
||||
assert(ecs->count == 0);
|
||||
edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
|
||||
edata_cache_fast_t *ecs) {
|
||||
edata_t *edata;
|
||||
malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
|
||||
while (ecs->count < EDATA_CACHE_SMALL_FILL) {
|
||||
edata = edata_avail_first(&ecs->fallback->avail);
|
||||
for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
|
||||
edata = edata_avail_remove_first(&ecs->fallback->avail);
|
||||
if (edata == NULL) {
|
||||
break;
|
||||
}
|
||||
edata_avail_remove(&ecs->fallback->avail, edata);
|
||||
edata_list_inactive_append(&ecs->list, edata);
|
||||
ecs->count++;
|
||||
atomic_load_sub_store_zu(&ecs->fallback->count, 1);
|
||||
}
|
||||
malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
|
||||
}
|
||||
|
||||
edata_t *
|
||||
edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
|
||||
edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
|
||||
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
|
||||
WITNESS_RANK_EDATA_CACHE, 0);
|
||||
|
||||
if (ecs->disabled) {
|
||||
assert(ecs->count == 0);
|
||||
assert(edata_list_inactive_first(&ecs->list) == NULL);
|
||||
return edata_cache_get(tsdn, ecs->fallback);
|
||||
}
|
||||
|
|
@ -96,15 +91,13 @@ edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
|
|||
edata_t *edata = edata_list_inactive_first(&ecs->list);
|
||||
if (edata != NULL) {
|
||||
edata_list_inactive_remove(&ecs->list, edata);
|
||||
ecs->count--;
|
||||
return edata;
|
||||
}
|
||||
/* Slow path; requires synchronization. */
|
||||
edata_cache_small_try_fill_from_fallback(tsdn, ecs);
|
||||
edata_cache_fast_try_fill_from_fallback(tsdn, ecs);
|
||||
edata = edata_list_inactive_first(&ecs->list);
|
||||
if (edata != NULL) {
|
||||
edata_list_inactive_remove(&ecs->list, edata);
|
||||
ecs->count--;
|
||||
} else {
|
||||
/*
|
||||
* Slowest path (fallback was also empty); allocate something
|
||||
|
|
@ -116,7 +109,7 @@ edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
|
|||
}
|
||||
|
||||
static void
|
||||
edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
|
||||
edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
|
||||
/*
|
||||
* You could imagine smarter cache management policies (like
|
||||
* only flushing down to some threshold in anticipation of
|
||||
|
|
@ -132,19 +125,16 @@ edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
|
|||
edata_avail_insert(&ecs->fallback->avail, edata);
|
||||
nflushed++;
|
||||
}
|
||||
atomic_load_add_store_zu(&ecs->fallback->count, ecs->count);
|
||||
atomic_load_add_store_zu(&ecs->fallback->count, nflushed);
|
||||
malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
|
||||
assert(nflushed == ecs->count);
|
||||
ecs->count = 0;
|
||||
}
|
||||
|
||||
void
|
||||
edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
|
||||
edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata) {
|
||||
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
|
||||
WITNESS_RANK_EDATA_CACHE, 0);
|
||||
|
||||
if (ecs->disabled) {
|
||||
assert(ecs->count == 0);
|
||||
assert(edata_list_inactive_first(&ecs->list) == NULL);
|
||||
edata_cache_put(tsdn, ecs->fallback, edata);
|
||||
return;
|
||||
|
|
@ -155,15 +145,10 @@ edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
|
|||
* cache locality.
|
||||
*/
|
||||
edata_list_inactive_prepend(&ecs->list, edata);
|
||||
ecs->count++;
|
||||
if (ecs->count > EDATA_CACHE_SMALL_MAX) {
|
||||
assert(ecs->count == EDATA_CACHE_SMALL_MAX + 1);
|
||||
edata_cache_small_flush_all(tsdn, ecs);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs) {
|
||||
edata_cache_small_flush_all(tsdn, ecs);
|
||||
edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
|
||||
edata_cache_fast_flush_all(tsdn, ecs);
|
||||
ecs->disabled = true;
|
||||
}
|
||||
|
|
|
|||
12
src/hpa.c
12
src/hpa.c
|
|
@ -187,7 +187,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
|
|||
assert(edata_cache != NULL);
|
||||
shard->central = central;
|
||||
shard->base = base;
|
||||
edata_cache_small_init(&shard->ecs, edata_cache);
|
||||
edata_cache_fast_init(&shard->ecf, edata_cache);
|
||||
psset_init(&shard->psset);
|
||||
shard->age_counter = 0;
|
||||
shard->ind = ind;
|
||||
|
|
@ -537,7 +537,7 @@ static edata_t *
|
|||
hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
|
||||
bool *oom) {
|
||||
bool err;
|
||||
edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
|
||||
edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
|
||||
if (edata == NULL) {
|
||||
*oom = true;
|
||||
return NULL;
|
||||
|
|
@ -545,7 +545,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
|
|||
|
||||
hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
|
||||
if (ps == NULL) {
|
||||
edata_cache_small_put(tsdn, &shard->ecs, edata);
|
||||
edata_cache_fast_put(tsdn, &shard->ecf, edata);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -592,7 +592,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
|
|||
* tweaked the stats, but our tweaks weren't really accurate).
|
||||
*/
|
||||
psset_update_end(&shard->psset, ps);
|
||||
edata_cache_small_put(tsdn, &shard->ecs, edata);
|
||||
edata_cache_fast_put(tsdn, &shard->ecf, edata);
|
||||
*oom = true;
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -805,7 +805,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
|
|||
assert(ps != NULL);
|
||||
void *unreserve_addr = edata_addr_get(edata);
|
||||
size_t unreserve_size = edata_size_get(edata);
|
||||
edata_cache_small_put(tsdn, &shard->ecs, edata);
|
||||
edata_cache_fast_put(tsdn, &shard->ecf, edata);
|
||||
|
||||
psset_update_begin(&shard->psset, ps);
|
||||
hpdata_unreserve(ps, unreserve_addr, unreserve_size);
|
||||
|
|
@ -844,7 +844,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
|
|||
void
|
||||
hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
|
||||
malloc_mutex_lock(tsdn, &shard->mtx);
|
||||
edata_cache_small_disable(tsdn, &shard->ecs);
|
||||
edata_cache_fast_disable(tsdn, &shard->ecf);
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue