From 03a604711113c9d883242291ca11b77c83ba4c75 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 29 Oct 2020 05:11:16 -0700
Subject: [PATCH] Edata cache small: rewrite.

In previous designs, this was intended to be a sort of cache that couldn't fail.
In the current design, we want to use it just as a contention reduction
mechanism.  Rewrite it with those goals in mind.
---
 include/jemalloc/internal/edata_cache.h |  39 ++---
 src/edata_cache.c                       | 134 ++++++++++-----
 test/unit/edata_cache.c                 | 206 +++++++++++++++++++++---
 3 files changed, 302 insertions(+), 77 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 02685c87..f7d0c319 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -3,6 +3,16 @@
 
 #include "jemalloc/internal/base.h"
 
+/*
+ * Public for tests.  When we go to the fallback when the small cache is empty,
+ * we grab up to 8 items (grabbing less only if the fallback is exhausted).
+ * When we exceed 16, we flush.  This caps the maximum memory lost per cache to
+ * 16 * sizeof(edata_t), a max of 2k on architectures where the edata_t is 128
+ * bytes.
+ */
+#define EDATA_CACHE_SMALL_MAX 16
+#define EDATA_CACHE_SMALL_FILL 8
+
 /*
  * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
  * the underlying extents they describe).  The contents of returned edata_t
@@ -25,32 +35,23 @@ void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 
+/*
+ * An edata_cache_small is like an edata_cache, but it relies on external
+ * synchronization and avoids first-fit strategies.
+ */
+
 typedef struct edata_cache_small_s edata_cache_small_t;
 struct edata_cache_small_s {
 	edata_list_inactive_t list;
 	size_t count;
 	edata_cache_t *fallback;
+	bool disabled;
 };
 
-/*
- * An edata_cache_small is like an edata_cache, but it relies on external
- * synchronization and avoids first-fit strategies.  You can call "prepare" to
- * acquire at least num edata_t objects, and then "finish" to flush all
- * excess ones back to their fallback edata_cache_t.  Once they have been
- * acquired, they can be allocated without failing (and in fact, this is
- * required -- it's not permitted to attempt to get an edata_t without first
- * preparing for it).
- */
-
 void edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback);
-
-/* Returns whether or not an error occurred. */
-bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num);
-edata_t *edata_cache_small_get(edata_cache_small_t *ecs);
-
-void edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata);
-void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num);
+edata_t *edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs);
+void edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    edata_t *edata);
+void edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/src/edata_cache.c b/src/edata_cache.c
index d899ce80..ecfce414 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -27,8 +27,7 @@ edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 		return base_alloc_edata(tsdn, edata_cache->base);
 	}
 	edata_avail_remove(&edata_cache->avail, edata);
-	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
-	atomic_store_zu(&edata_cache->count, count - 1, ATOMIC_RELAXED);
+	atomic_load_sub_store_zu(&edata_cache->count, 1);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 	return edata;
 }
@@ -37,8 +36,7 @@ void
 edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &edata_cache->mtx);
 	edata_avail_insert(&edata_cache->avail, edata);
-	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
-	atomic_store_zu(&edata_cache->count, count + 1, ATOMIC_RELAXED);
+	atomic_load_add_store_zu(&edata_cache->count, 1);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 }
 
@@ -62,48 +60,110 @@ edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
 	edata_list_inactive_init(&ecs->list);
 	ecs->count = 0;
 	ecs->fallback = fallback;
+	ecs->disabled = false;
+}
+
+static void
+edata_cache_small_try_fill_from_fallback(tsdn_t *tsdn,
+    edata_cache_small_t *ecs) {
+	assert(ecs->count == 0);
+	edata_t *edata;
+	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
+	while (ecs->count < EDATA_CACHE_SMALL_FILL) {
+		edata = edata_avail_first(&ecs->fallback->avail);
+		if (edata == NULL) {
+			break;
+		}
+		edata_avail_remove(&ecs->fallback->avail, edata);
+		edata_list_inactive_append(&ecs->list, edata);
+		ecs->count++;
+		atomic_load_sub_store_zu(&ecs->fallback->count, 1);
+	}
+	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
 }
 
 edata_t *
-edata_cache_small_get(edata_cache_small_t *ecs) {
-	assert(ecs->count > 0);
+edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_EDATA_CACHE, 0);
+
+	if (ecs->disabled) {
+		assert(ecs->count == 0);
+		assert(edata_list_inactive_first(&ecs->list) == NULL);
+		return edata_cache_get(tsdn, ecs->fallback);
+	}
+
 	edata_t *edata = edata_list_inactive_first(&ecs->list);
-	assert(edata != NULL);
-	edata_list_inactive_remove(&ecs->list, edata);
-	ecs->count--;
+	if (edata != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		ecs->count--;
+		return edata;
+	}
+	/* Slow path; requires synchronization. */
+	edata_cache_small_try_fill_from_fallback(tsdn, ecs);
+	edata = edata_list_inactive_first(&ecs->list);
+	if (edata != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		ecs->count--;
+	} else {
+		/*
+		 * Slowest path (fallback was also empty); allocate something
+		 * new.
+		 */
+		edata = base_alloc_edata(tsdn, ecs->fallback->base);
+	}
 	return edata;
 }
 
+static void
+edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	/*
+	 * You could imagine smarter cache management policies (like
+	 * only flushing down to some threshold in anticipation of
+	 * future get requests).  But just flushing everything provides
+	 * a good opportunity to defrag too, and lets us share code between the
+	 * flush and disable pathways.
+	 */
+	edata_t *edata;
+	size_t nflushed = 0;
+	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
+	while ((edata = edata_list_inactive_first(&ecs->list)) != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		edata_avail_insert(&ecs->fallback->avail, edata);
+		nflushed++;
+	}
+	atomic_load_add_store_zu(&ecs->fallback->count, ecs->count);
+	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
+	assert(nflushed == ecs->count);
+	ecs->count = 0;
+}
+
 void
-edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata) {
-	assert(edata != NULL);
-	edata_list_inactive_append(&ecs->list, edata);
-	ecs->count++;
-}
+edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_EDATA_CACHE, 0);
 
-bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num) {
-	while (ecs->count < num) {
-		/*
-		 * Obviously, we can be smarter here and batch the locking that
-		 * happens inside of edata_cache_get.  But for now, something
-		 * quick-and-dirty is fine.
-		 */
-		edata_t *edata = edata_cache_get(tsdn, ecs->fallback);
-		if (edata == NULL) {
-			return true;
-		}
-		ql_elm_new(edata, ql_link_inactive);
-		edata_cache_small_put(ecs, edata);
-	}
-	return false;
-}
-
-void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num) {
-	while (ecs->count > num) {
-		/* Same deal here -- we should be batching. */
-		edata_t *edata = edata_cache_small_get(ecs);
+	if (ecs->disabled) {
+		assert(ecs->count == 0);
+		assert(edata_list_inactive_first(&ecs->list) == NULL);
 		edata_cache_put(tsdn, ecs->fallback, edata);
+		return;
+	}
+
+	/*
+	 * Prepend rather than append, to do LIFO ordering in the hopes of some
+	 * cache locality.
+	 */
+	edata_list_inactive_prepend(&ecs->list, edata);
+	ecs->count++;
+	if (ecs->count > EDATA_CACHE_SMALL_MAX) {
+		assert(ecs->count == EDATA_CACHE_SMALL_MAX + 1);
+		edata_cache_small_flush_all(tsdn, ecs);
 	}
 }
+
+void
+edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	edata_cache_small_flush_all(tsdn, ecs);
+	ecs->disabled = true;
+}
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index 22c9dcb8..9a5d14b0 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -47,37 +47,198 @@ TEST_BEGIN(test_edata_cache) {
 }
 TEST_END
 
-TEST_BEGIN(test_edata_cache_small) {
+TEST_BEGIN(test_edata_cache_small_simple) {
 	edata_cache_t ec;
 	edata_cache_small_t ecs;
 
 	test_edata_cache_init(&ec);
 	edata_cache_small_init(&ecs, &ec);
 
-	bool err = edata_cache_small_prepare(TSDN_NULL, &ecs, 2);
-	assert_false(err, "");
-	assert_zu_eq(ecs.count, 2, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_t *ed1 = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(ed1, "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_t *ed1 = edata_cache_small_get(&ecs);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_t *ed2 = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(ed2, "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_t *ed2 = edata_cache_small_get(&ecs);
-	assert_zu_eq(ecs.count, 0, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_cache_small_put(TSDN_NULL, &ecs, ed1);
+	expect_zu_eq(ecs.count, 1, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(&ecs, ed1);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_cache_small_put(TSDN_NULL, &ecs, ed2);
+	expect_zu_eq(ecs.count, 2, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(&ecs, ed2);
-	assert_zu_eq(ecs.count, 2, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	/* LIFO ordering. */
+	expect_ptr_eq(ed2, edata_cache_small_get(TSDN_NULL, &ecs), "");
+	expect_zu_eq(ecs.count, 1, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_finish(TSDN_NULL, &ecs, 1);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
+	expect_ptr_eq(ed1, edata_cache_small_get(TSDN_NULL, &ecs), "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_fill) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	edata_t *allocs[EDATA_CACHE_SMALL_FILL * 2];
+
+	/*
+	 * If the fallback cache can't satisfy the request, we shouldn't do
+	 * extra allocations until compelled to.  Put half the fill goal in the
+	 * fallback.
+	 */
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
+	}
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2 - 1, ecs.count,
+	    "Should have grabbed all edatas available but no more.");
+
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+
+	/* When forced, we should alloc from the base. */
+	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(edata, "");
+	expect_zu_eq(0, ecs.count, "Allocated more than necessary");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Allocated more than necessary");
+
+	/*
+	 * We should correctly fill in the common case where the fallback isn't
+	 * exhausted, too.
+	 */
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
+	}
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
+		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_flush) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	edata_t *allocs[2 * EDATA_CACHE_SMALL_MAX + 2];
+	for (int i = 0; i < 2 * EDATA_CACHE_SMALL_MAX + 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_MAX; i++) {
+		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
+		expect_zu_eq(i + 1, ecs.count, "");
+		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	}
+	edata_cache_small_put(TSDN_NULL, &ecs, allocs[EDATA_CACHE_SMALL_MAX]);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	for (int i = EDATA_CACHE_SMALL_MAX + 1;
+	    i < 2 * EDATA_CACHE_SMALL_MAX + 1; i++) {
+		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
+		expect_zu_eq(i - EDATA_CACHE_SMALL_MAX, ecs.count, "");
+		expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
+		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	}
+	edata_cache_small_put(TSDN_NULL, &ecs, allocs[2 * EDATA_CACHE_SMALL_MAX + 1]);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(2 * EDATA_CACHE_SMALL_MAX + 2,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_disable) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL; i++) {
+		edata_t *edata = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(edata, "");
+		edata_cache_small_put(TSDN_NULL, &ecs, edata);
+	}
+
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	edata_cache_small_disable(TSDN_NULL, &ecs);
+
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "Disabling should flush");
+
+	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabled ecs should forward on get");
+
+	edata_cache_small_put(TSDN_NULL, &ecs, edata);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabled ecs should forward on put");
 
 	test_edata_cache_destroy(&ec);
 }
@@ -87,5 +248,8 @@ int
 main(void) {
 	return test(
 	    test_edata_cache,
-	    test_edata_cache_small);
+	    test_edata_cache_small_simple,
+	    test_edata_cache_fill,
+	    test_edata_cache_flush,
+	    test_edata_cache_disable);
 }