edata_cache: Allow unbounded fast caching.

The edata_cache_small had a fill/flush heuristic.  In retrospect, this was a
premature optimization; more testing indicates that an unbounded cache is
effectively fine here, and moreover we spend a nontrivial amount of time doing
unnecessary filling/flushing.

As the HPA takes on a larger and larger fraction of all allocations, any
theoretical differences in allocation patterns should shrink.  The HPA is more
efficient with its metadata in general, so it still comes out ahead on metadata
usage anyways.
This commit is contained in:
David Goldblatt 2021-07-23 15:29:43 -07:00 committed by David Goldblatt
parent d93eef2f40
commit 92a1e38f52
5 changed files with 99 additions and 151 deletions

View file

@ -56,39 +56,34 @@ edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
}
void
edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback) {
edata_list_inactive_init(&ecs->list);
ecs->count = 0;
ecs->fallback = fallback;
ecs->disabled = false;
}
static void
edata_cache_small_try_fill_from_fallback(tsdn_t *tsdn,
edata_cache_small_t *ecs) {
assert(ecs->count == 0);
edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
edata_cache_fast_t *ecs) {
edata_t *edata;
malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
while (ecs->count < EDATA_CACHE_SMALL_FILL) {
edata = edata_avail_first(&ecs->fallback->avail);
for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
edata = edata_avail_remove_first(&ecs->fallback->avail);
if (edata == NULL) {
break;
}
edata_avail_remove(&ecs->fallback->avail, edata);
edata_list_inactive_append(&ecs->list, edata);
ecs->count++;
atomic_load_sub_store_zu(&ecs->fallback->count, 1);
}
malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
}
edata_t *
edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_EDATA_CACHE, 0);
if (ecs->disabled) {
assert(ecs->count == 0);
assert(edata_list_inactive_first(&ecs->list) == NULL);
return edata_cache_get(tsdn, ecs->fallback);
}
@ -96,15 +91,13 @@ edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
edata_t *edata = edata_list_inactive_first(&ecs->list);
if (edata != NULL) {
edata_list_inactive_remove(&ecs->list, edata);
ecs->count--;
return edata;
}
/* Slow path; requires synchronization. */
edata_cache_small_try_fill_from_fallback(tsdn, ecs);
edata_cache_fast_try_fill_from_fallback(tsdn, ecs);
edata = edata_list_inactive_first(&ecs->list);
if (edata != NULL) {
edata_list_inactive_remove(&ecs->list, edata);
ecs->count--;
} else {
/*
* Slowest path (fallback was also empty); allocate something
@ -116,7 +109,7 @@ edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
}
static void
edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
/*
* You could imagine smarter cache management policies (like
* only flushing down to some threshold in anticipation of
@ -132,19 +125,16 @@ edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
edata_avail_insert(&ecs->fallback->avail, edata);
nflushed++;
}
atomic_load_add_store_zu(&ecs->fallback->count, ecs->count);
atomic_load_add_store_zu(&ecs->fallback->count, nflushed);
malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
assert(nflushed == ecs->count);
ecs->count = 0;
}
void
edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata) {
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_EDATA_CACHE, 0);
if (ecs->disabled) {
assert(ecs->count == 0);
assert(edata_list_inactive_first(&ecs->list) == NULL);
edata_cache_put(tsdn, ecs->fallback, edata);
return;
@ -155,15 +145,10 @@ edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
* cache locality.
*/
edata_list_inactive_prepend(&ecs->list, edata);
ecs->count++;
if (ecs->count > EDATA_CACHE_SMALL_MAX) {
assert(ecs->count == EDATA_CACHE_SMALL_MAX + 1);
edata_cache_small_flush_all(tsdn, ecs);
}
}
void
edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs) {
edata_cache_small_flush_all(tsdn, ecs);
edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
edata_cache_fast_flush_all(tsdn, ecs);
ecs->disabled = true;
}

View file

@ -187,7 +187,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
assert(edata_cache != NULL);
shard->central = central;
shard->base = base;
edata_cache_small_init(&shard->ecs, edata_cache);
edata_cache_fast_init(&shard->ecf, edata_cache);
psset_init(&shard->psset);
shard->age_counter = 0;
shard->ind = ind;
@ -537,7 +537,7 @@ static edata_t *
hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
bool *oom) {
bool err;
edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
if (edata == NULL) {
*oom = true;
return NULL;
@ -545,7 +545,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
if (ps == NULL) {
edata_cache_small_put(tsdn, &shard->ecs, edata);
edata_cache_fast_put(tsdn, &shard->ecf, edata);
return NULL;
}
@ -592,7 +592,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
* tweaked the stats, but our tweaks weren't really accurate).
*/
psset_update_end(&shard->psset, ps);
edata_cache_small_put(tsdn, &shard->ecs, edata);
edata_cache_fast_put(tsdn, &shard->ecf, edata);
*oom = true;
return NULL;
}
@ -805,7 +805,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
assert(ps != NULL);
void *unreserve_addr = edata_addr_get(edata);
size_t unreserve_size = edata_size_get(edata);
edata_cache_small_put(tsdn, &shard->ecs, edata);
edata_cache_fast_put(tsdn, &shard->ecf, edata);
psset_update_begin(&shard->psset, ps);
hpdata_unreserve(ps, unreserve_addr, unreserve_size);
@ -844,7 +844,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
void
hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_lock(tsdn, &shard->mtx);
edata_cache_small_disable(tsdn, &shard->ecs);
edata_cache_fast_disable(tsdn, &shard->ecf);
malloc_mutex_unlock(tsdn, &shard->mtx);
}