mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-14 22:51:50 +03:00
Converting size to usize is what jemalloc has been done by ceiling size to the closest size class. However, this causes lots of memory wastes with HPA enabled. This commit changes how usize is calculated so that the gap between two contiguous usize is no larger than a page. Specifically, this commit includes the following changes: 1. Adding a build-time config option (--enable-limit-usize-gap) and a runtime one (limit_usize_gap) to guard the changes. When build-time config is enabled, some minor CPU overhead is expected because usize will be stored and accessed apart from index. When runtime option is also enabled (it can only be enabled with the build-time config enabled). a new usize calculation approach wil be employed. This new calculation will ceil size to the closest multiple of PAGE for all sizes larger than USIZE_GROW_SLOW_THRESHOLD instead of using the size classes. Note when the build-time config is enabled, the runtime option is default on. 2. Prepare tcache for size to grow by PAGE over GROUP*PAGE. To prepare for the upcoming changes where size class grows by PAGE when larger than NGROUP * PAGE, disable the tcache when it is larger than 2 * NGROUP * PAGE. The threshold for tcache is set higher to prevent perf regression as much as possible while usizes between NGROUP * PAGE and 2 * NGROUP * PAGE happen to grow by PAGE. 3. Prepare pac and hpa psset for size to grow by PAGE over GROUP*PAGE For PAC, to avoid having too many bins, arena bins still have the same layout. This means some extra search is needed for a page-level request that is not aligned with the orginal size class: it should also search the heap before the current index since the previous heap might also be able to have some allocations satisfying it. The same changes apply to HPA's psset. This search relies on the enumeration of the heap because not all allocs in the previous heap are guaranteed to satisfy the request. To balance the memory and CPU overhead, we currently enumerate at most a fixed number of nodes before concluding none can satisfy the request during an enumeration. 4. Add bytes counter to arena large stats. To prepare for the upcoming usize changes, stats collected by multiplying alive allocations and the bin size is no longer accurate. Thus, add separate counters to record the bytes malloced and dalloced. 5. Change structs use when freeing to avoid using index2size for large sizes. - Change the definition of emap_alloc_ctx_t - Change the read of both from edata_t. - Change the assignment and usage of emap_alloc_ctx_t. - Change other callsites of index2size. Note for the changes in the data structure, i.e., emap_alloc_ctx_t, will be used when the build-time config (--enable-limit-usize-gap) is enabled but they will store the same value as index2size(szind) if the runtime option (opt_limit_usize_gap) is not enabled. 6. Adapt hpa to the usize changes. Change the settings in sec to limit is usage for sizes larger than USIZE_GROW_SLOW_THRESHOLD and modify corresponding tests. 7. Modify usize calculation and corresponding tests. Change the sz_s2u_compute. Note sz_index2size is not always safe now while sz_size2index still works as expected.
347 lines
7.9 KiB
C
347 lines
7.9 KiB
C
#include "test/jemalloc_test.h"
|
|
|
|
#include "jemalloc/internal/ph.h"
|
|
|
|
#define BFS_ENUMERATE_MAX 30
|
|
typedef struct node_s node_t;
|
|
ph_structs(heap, node_t, BFS_ENUMERATE_MAX);
|
|
|
|
struct node_s {
|
|
#define NODE_MAGIC 0x9823af7e
|
|
uint32_t magic;
|
|
heap_link_t link;
|
|
uint64_t key;
|
|
};
|
|
|
|
static int
|
|
node_cmp(const node_t *a, const node_t *b) {
|
|
int ret;
|
|
|
|
ret = (a->key > b->key) - (a->key < b->key);
|
|
if (ret == 0) {
|
|
/*
|
|
* Duplicates are not allowed in the heap, so force an
|
|
* arbitrary ordering for non-identical items with equal keys.
|
|
*/
|
|
ret = (((uintptr_t)a) > ((uintptr_t)b))
|
|
- (((uintptr_t)a) < ((uintptr_t)b));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
node_cmp_magic(const node_t *a, const node_t *b) {
|
|
|
|
expect_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
|
|
expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
|
|
|
|
return node_cmp(a, b);
|
|
}
|
|
|
|
ph_gen(static, heap, node_t, link, node_cmp_magic);
|
|
|
|
static node_t *
|
|
node_next_get(const node_t *node) {
|
|
return phn_next_get((node_t *)node, offsetof(node_t, link));
|
|
}
|
|
|
|
static node_t *
|
|
node_prev_get(const node_t *node) {
|
|
return phn_prev_get((node_t *)node, offsetof(node_t, link));
|
|
}
|
|
|
|
static node_t *
|
|
node_lchild_get(const node_t *node) {
|
|
return phn_lchild_get((node_t *)node, offsetof(node_t, link));
|
|
}
|
|
|
|
static void
|
|
node_print(const node_t *node, unsigned depth) {
|
|
unsigned i;
|
|
node_t *leftmost_child, *sibling;
|
|
|
|
for (i = 0; i < depth; i++) {
|
|
malloc_printf("\t");
|
|
}
|
|
malloc_printf("%2"FMTu64"\n", node->key);
|
|
|
|
leftmost_child = node_lchild_get(node);
|
|
if (leftmost_child == NULL) {
|
|
return;
|
|
}
|
|
node_print(leftmost_child, depth + 1);
|
|
|
|
for (sibling = node_next_get(leftmost_child); sibling !=
|
|
NULL; sibling = node_next_get(sibling)) {
|
|
node_print(sibling, depth + 1);
|
|
}
|
|
}
|
|
|
|
static void
|
|
heap_print(const heap_t *heap) {
|
|
node_t *auxelm;
|
|
|
|
malloc_printf("vvv heap %p vvv\n", heap);
|
|
if (heap->ph.root == NULL) {
|
|
goto label_return;
|
|
}
|
|
|
|
node_print(heap->ph.root, 0);
|
|
|
|
for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
|
|
auxelm = node_next_get(auxelm)) {
|
|
expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
|
|
"auxelm's prev doesn't link to auxelm");
|
|
node_print(auxelm, 0);
|
|
}
|
|
|
|
label_return:
|
|
malloc_printf("^^^ heap %p ^^^\n", heap);
|
|
}
|
|
|
|
static unsigned
|
|
node_validate(const node_t *node, const node_t *parent) {
|
|
unsigned nnodes = 1;
|
|
node_t *leftmost_child, *sibling;
|
|
|
|
if (parent != NULL) {
|
|
expect_d_ge(node_cmp_magic(node, parent), 0,
|
|
"Child is less than parent");
|
|
}
|
|
|
|
leftmost_child = node_lchild_get(node);
|
|
if (leftmost_child == NULL) {
|
|
return nnodes;
|
|
}
|
|
expect_ptr_eq(node_prev_get(leftmost_child),
|
|
(void *)node, "Leftmost child does not link to node");
|
|
nnodes += node_validate(leftmost_child, node);
|
|
|
|
for (sibling = node_next_get(leftmost_child); sibling !=
|
|
NULL; sibling = node_next_get(sibling)) {
|
|
expect_ptr_eq(node_next_get(node_prev_get(sibling)), sibling,
|
|
"sibling's prev doesn't link to sibling");
|
|
nnodes += node_validate(sibling, node);
|
|
}
|
|
return nnodes;
|
|
}
|
|
|
|
static unsigned
|
|
heap_validate(const heap_t *heap) {
|
|
unsigned nnodes = 0;
|
|
node_t *auxelm;
|
|
|
|
if (heap->ph.root == NULL) {
|
|
goto label_return;
|
|
}
|
|
|
|
nnodes += node_validate(heap->ph.root, NULL);
|
|
|
|
for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
|
|
auxelm = node_next_get(auxelm)) {
|
|
expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
|
|
"auxelm's prev doesn't link to auxelm");
|
|
nnodes += node_validate(auxelm, NULL);
|
|
}
|
|
|
|
label_return:
|
|
if (false) {
|
|
heap_print(heap);
|
|
}
|
|
return nnodes;
|
|
}
|
|
|
|
TEST_BEGIN(test_ph_empty) {
|
|
heap_t heap;
|
|
|
|
heap_new(&heap);
|
|
expect_true(heap_empty(&heap), "Heap should be empty");
|
|
expect_ptr_null(heap_first(&heap), "Unexpected node");
|
|
expect_ptr_null(heap_any(&heap), "Unexpected node");
|
|
}
|
|
TEST_END
|
|
|
|
static void
|
|
node_remove(heap_t *heap, node_t *node) {
|
|
heap_remove(heap, node);
|
|
|
|
node->magic = 0;
|
|
}
|
|
|
|
static node_t *
|
|
node_remove_first(heap_t *heap) {
|
|
node_t *node = heap_remove_first(heap);
|
|
node->magic = 0;
|
|
return node;
|
|
}
|
|
|
|
static node_t *
|
|
node_remove_any(heap_t *heap) {
|
|
node_t *node = heap_remove_any(heap);
|
|
node->magic = 0;
|
|
return node;
|
|
}
|
|
|
|
TEST_BEGIN(test_ph_random) {
|
|
#define NNODES 25
|
|
#define NBAGS 250
|
|
#define SEED 42
|
|
sfmt_t *sfmt;
|
|
uint64_t bag[NNODES];
|
|
heap_t heap;
|
|
node_t nodes[NNODES];
|
|
unsigned i, j, k;
|
|
|
|
sfmt = init_gen_rand(SEED);
|
|
for (i = 0; i < NBAGS; i++) {
|
|
switch (i) {
|
|
case 0:
|
|
/* Insert in order. */
|
|
for (j = 0; j < NNODES; j++) {
|
|
bag[j] = j;
|
|
}
|
|
break;
|
|
case 1:
|
|
/* Insert in reverse order. */
|
|
for (j = 0; j < NNODES; j++) {
|
|
bag[j] = NNODES - j - 1;
|
|
}
|
|
break;
|
|
default:
|
|
for (j = 0; j < NNODES; j++) {
|
|
bag[j] = gen_rand64_range(sfmt, NNODES);
|
|
}
|
|
}
|
|
|
|
for (j = 1; j <= NNODES; j++) {
|
|
/* Initialize heap and nodes. */
|
|
heap_new(&heap);
|
|
expect_u_eq(heap_validate(&heap), 0,
|
|
"Incorrect node count");
|
|
for (k = 0; k < j; k++) {
|
|
nodes[k].magic = NODE_MAGIC;
|
|
nodes[k].key = bag[k];
|
|
}
|
|
|
|
/* Insert nodes. */
|
|
for (k = 0; k < j; k++) {
|
|
heap_insert(&heap, &nodes[k]);
|
|
if (i % 13 == 12) {
|
|
expect_ptr_not_null(heap_any(&heap),
|
|
"Heap should not be empty");
|
|
/* Trigger merging. */
|
|
expect_ptr_not_null(heap_first(&heap),
|
|
"Heap should not be empty");
|
|
}
|
|
expect_u_eq(heap_validate(&heap), k + 1,
|
|
"Incorrect node count");
|
|
}
|
|
|
|
expect_false(heap_empty(&heap),
|
|
"Heap should not be empty");
|
|
|
|
/* Enumerate nodes. */
|
|
heap_enumerate_helper_t helper;
|
|
uint16_t max_queue_size = sizeof(helper.bfs_queue)
|
|
/ sizeof(void *);
|
|
expect_u_eq(max_queue_size, BFS_ENUMERATE_MAX,
|
|
"Incorrect bfs queue length initialized");
|
|
assert(max_queue_size == BFS_ENUMERATE_MAX);
|
|
heap_enumerate_prepare(&heap, &helper,
|
|
BFS_ENUMERATE_MAX, max_queue_size);
|
|
size_t node_count = 0;
|
|
while(heap_enumerate_next(&heap, &helper)) {
|
|
node_count ++;
|
|
}
|
|
expect_lu_eq(node_count, j,
|
|
"Unexpected enumeration results.");
|
|
|
|
/* Remove nodes. */
|
|
switch (i % 6) {
|
|
case 0:
|
|
for (k = 0; k < j; k++) {
|
|
expect_u_eq(heap_validate(&heap), j - k,
|
|
"Incorrect node count");
|
|
node_remove(&heap, &nodes[k]);
|
|
expect_u_eq(heap_validate(&heap), j - k
|
|
- 1, "Incorrect node count");
|
|
}
|
|
break;
|
|
case 1:
|
|
for (k = j; k > 0; k--) {
|
|
node_remove(&heap, &nodes[k-1]);
|
|
expect_u_eq(heap_validate(&heap), k - 1,
|
|
"Incorrect node count");
|
|
}
|
|
break;
|
|
case 2: {
|
|
node_t *prev = NULL;
|
|
for (k = 0; k < j; k++) {
|
|
node_t *node = node_remove_first(&heap);
|
|
expect_u_eq(heap_validate(&heap), j - k
|
|
- 1, "Incorrect node count");
|
|
if (prev != NULL) {
|
|
expect_d_ge(node_cmp(node,
|
|
prev), 0,
|
|
"Bad removal order");
|
|
}
|
|
prev = node;
|
|
}
|
|
break;
|
|
} case 3: {
|
|
node_t *prev = NULL;
|
|
for (k = 0; k < j; k++) {
|
|
node_t *node = heap_first(&heap);
|
|
expect_u_eq(heap_validate(&heap), j - k,
|
|
"Incorrect node count");
|
|
if (prev != NULL) {
|
|
expect_d_ge(node_cmp(node,
|
|
prev), 0,
|
|
"Bad removal order");
|
|
}
|
|
node_remove(&heap, node);
|
|
expect_u_eq(heap_validate(&heap), j - k
|
|
- 1, "Incorrect node count");
|
|
prev = node;
|
|
}
|
|
break;
|
|
} case 4: {
|
|
for (k = 0; k < j; k++) {
|
|
node_remove_any(&heap);
|
|
expect_u_eq(heap_validate(&heap), j - k
|
|
- 1, "Incorrect node count");
|
|
}
|
|
break;
|
|
} case 5: {
|
|
for (k = 0; k < j; k++) {
|
|
node_t *node = heap_any(&heap);
|
|
expect_u_eq(heap_validate(&heap), j - k,
|
|
"Incorrect node count");
|
|
node_remove(&heap, node);
|
|
expect_u_eq(heap_validate(&heap), j - k
|
|
- 1, "Incorrect node count");
|
|
}
|
|
break;
|
|
} default:
|
|
not_reached();
|
|
}
|
|
|
|
expect_ptr_null(heap_first(&heap),
|
|
"Heap should be empty");
|
|
expect_ptr_null(heap_any(&heap),
|
|
"Heap should be empty");
|
|
expect_true(heap_empty(&heap), "Heap should be empty");
|
|
}
|
|
}
|
|
fini_gen_rand(sfmt);
|
|
#undef NNODES
|
|
#undef SEED
|
|
}
|
|
TEST_END
|
|
|
|
int
|
|
main(void) {
|
|
return test(
|
|
test_ph_empty,
|
|
test_ph_random);
|
|
}
|