diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 2381ccbc..aeed7482 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -21,6 +21,14 @@ */ #define EDATA_ALIGNMENT 128 +/* + * Defines how many nodes visited when enumerating the heap to search for + * qualifed extents. More nodes visited may result in better choices at + * the cost of longer search time. This size should not exceed 2^16 - 1 + * because we use uint16_t for accessing the queue needed for enumeration. + */ +#define ESET_ENUMERATE_MAX_NUM 32 + enum extent_state_e { extent_state_active = 0, extent_state_dirty = 1, @@ -89,8 +97,8 @@ struct edata_cmp_summary_s { /* Extent (span of pages). Use accessor functions for e_* fields. */ typedef struct edata_s edata_t; -ph_structs(edata_avail, edata_t); -ph_structs(edata_heap, edata_t); +ph_structs(edata_avail, edata_t, ESET_ENUMERATE_MAX_NUM); +ph_structs(edata_heap, edata_t, ESET_ENUMERATE_MAX_NUM); struct edata_s { /* * Bitfield containing several fields: diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h index 7ba92112..a8a845ec 100644 --- a/include/jemalloc/internal/hpdata.h +++ b/include/jemalloc/internal/hpdata.h @@ -20,8 +20,14 @@ * an observable property of any given region of address space). It's just * hugepage-sized and hugepage-aligned; it's *potentially* huge. */ + +/* + * The max enumeration num should not exceed 2^16 - 1, see comments in edata.h + * for ESET_ENUMERATE_MAX_NUM for more details. + */ +#define PSSET_ENUMERATE_MAX_NUM 32 typedef struct hpdata_s hpdata_t; -ph_structs(hpdata_age_heap, hpdata_t); +ph_structs(hpdata_age_heap, hpdata_t, PSSET_ENUMERATE_MAX_NUM); struct hpdata_s { /* * We likewise follow the edata convention of mangling names and forcing diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h index ef9634be..05376004 100644 --- a/include/jemalloc/internal/ph.h +++ b/include/jemalloc/internal/ph.h @@ -75,6 +75,16 @@ struct ph_s { size_t auxcount; }; +typedef struct ph_enumerate_vars_s ph_enumerate_vars_t; +struct ph_enumerate_vars_s { + uint16_t front; + uint16_t rear; + uint16_t queue_size; + uint16_t visited_num; + uint16_t max_visit_num; + uint16_t max_queue_size; +}; + JEMALLOC_ALWAYS_INLINE phn_link_t * phn_link_get(void *phn, size_t offset) { return (phn_link_t *)(((char *)phn) + offset); @@ -414,14 +424,98 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) { } } -#define ph_structs(a_prefix, a_type) \ +JEMALLOC_ALWAYS_INLINE void +ph_enumerate_vars_init(ph_enumerate_vars_t *vars, uint16_t max_visit_num, + uint16_t max_queue_size) { + vars->queue_size = 0; + vars->visited_num = 0; + vars->front = 0; + vars->rear = 0; + vars->max_visit_num = max_visit_num; + vars->max_queue_size = max_queue_size; + assert(vars->max_visit_num > 0); + /* + * max_queue_size must be able to support max_visit_num, which means + * the queue will not overflow before reaching max_visit_num. + */ + assert(vars->max_queue_size >= (vars->max_visit_num + 1)/2); +} + +JEMALLOC_ALWAYS_INLINE void +ph_enumerate_queue_push(void *phn, void **bfs_queue, + ph_enumerate_vars_t *vars) { + assert(vars->queue_size < vars->max_queue_size); + bfs_queue[vars->rear] = phn; + vars->rear = (vars->rear + 1) % vars->max_queue_size; + (vars->queue_size) ++; +} + +JEMALLOC_ALWAYS_INLINE void * +ph_enumerate_queue_pop(void **bfs_queue, ph_enumerate_vars_t *vars) { + assert(vars->queue_size > 0); + assert(vars->queue_size <= vars->max_queue_size); + void *ret = bfs_queue[vars->front]; + vars->front = (vars->front + 1) % vars->max_queue_size; + (vars->queue_size) --; + return ret; +} + + +/* + * The two functions below offer a solution to enumerate the pairing heap. + * Whe enumerating, always call ph_enumerate_prepare first to prepare the queue + * needed for BFS. Next, call ph_enumerate_next to get the next element in + * the enumeration. When enumeration ends, ph_enumerate_next returns NULL and + * should not be called again. Enumeration ends when all elements in the heap + * has been enumerated or the number of visited elements exceed + * max_visit_num. + */ +JEMALLOC_ALWAYS_INLINE void +ph_enumerate_prepare(ph_t *ph, void **bfs_queue, ph_enumerate_vars_t *vars, + uint16_t max_visit_num, uint16_t max_queue_size) { + ph_enumerate_vars_init(vars, max_visit_num, max_queue_size); + ph_enumerate_queue_push(ph->root, bfs_queue, vars); +} + +JEMALLOC_ALWAYS_INLINE void * +ph_enumerate_next(ph_t *ph, size_t offset, void **bfs_queue, + ph_enumerate_vars_t *vars) { + if (vars->queue_size == 0) { + return NULL; + } + + (vars->visited_num) ++; + if (vars->visited_num > vars->max_visit_num) { + return NULL; + } + + void *ret = ph_enumerate_queue_pop(bfs_queue, vars); + assert(ret != NULL); + void *left = phn_lchild_get(ret, offset); + void *right = phn_next_get(ret, offset); + if (left) { + ph_enumerate_queue_push(left, bfs_queue, vars); + } + if (right) { + ph_enumerate_queue_push(right, bfs_queue, vars); + } + return ret; +} + +#define ph_structs(a_prefix, a_type, a_max_queue_size) \ typedef struct { \ phn_link_t link; \ } a_prefix##_link_t; \ \ typedef struct { \ ph_t ph; \ -} a_prefix##_t; +} a_prefix##_t; \ + \ +typedef struct { \ + void *bfs_queue[a_max_queue_size]; \ + ph_enumerate_vars_t vars; \ +} a_prefix##_enumerate_helper_t; + /* * The ph_proto() macro generates function prototypes that correspond to the @@ -436,7 +530,12 @@ a_attr a_type *a_prefix##_any(a_prefix##_t *ph); \ a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn); \ a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph); \ a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn); \ -a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph); +a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph); \ +a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph, \ + a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num, \ + uint16_t max_queue_size); \ +a_attr a_type *a_prefix##_enumerate_next(a_prefix##_t *ph, \ + a_prefix##_enumerate_helper_t *helper); /* The ph_gen() macro generates a type-specific pairing heap implementation. */ #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp) \ @@ -491,6 +590,21 @@ a_prefix##_remove_any(a_prefix##_t *ph) { \ a_prefix##_remove(ph, ret); \ } \ return ret; \ +} \ + \ +a_attr void \ +a_prefix##_enumerate_prepare(a_prefix##_t *ph, \ + a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num, \ + uint16_t max_queue_size) { \ + ph_enumerate_prepare(&ph->ph, helper->bfs_queue, &helper->vars, \ + max_visit_num, max_queue_size); \ +} \ + \ +a_attr a_type * \ +a_prefix##_enumerate_next(a_prefix##_t *ph, \ + a_prefix##_enumerate_helper_t *helper) { \ + return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field), \ + helper->bfs_queue, &helper->vars); \ } #endif /* JEMALLOC_INTERNAL_PH_H */ diff --git a/src/eset.c b/src/eset.c index 6f8f335e..4699c607 100644 --- a/src/eset.c +++ b/src/eset.c @@ -155,6 +155,69 @@ eset_remove(eset_t *eset, edata_t *edata) { cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED); } +edata_t * +eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind, + size_t alignment) { + if (edata_heap_empty(&eset->bins[bin_ind].heap)) { + return NULL; + } + + edata_t *edata = NULL; + edata_heap_enumerate_helper_t helper; + edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper, + ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *)); + while ((edata = + edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper))) { + uintptr_t base = (uintptr_t)edata_base_get(edata); + size_t candidate_size = edata_size_get(edata); + if (candidate_size < size) { + continue; + } + + uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base, + PAGE_CEILING(alignment)); + if (base > next_align || base + candidate_size <= next_align) { + /* Overflow or not crossing the next alignment. */ + continue; + } + + size_t leadsize = next_align - base; + if (candidate_size - leadsize >= size) { + return edata; + } + } + + return NULL; +} + +edata_t * +eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind, + bool exact_only, edata_cmp_summary_t *ret_summ) { + if (edata_heap_empty(&eset->bins[bin_ind].heap)) { + return NULL; + } + + edata_t *ret = NULL, *edata = NULL; + edata_heap_enumerate_helper_t helper; + edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper, + ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *)); + while ((edata = + edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper))) { + if ((!exact_only && edata_size_get(edata) >= size) || + (exact_only && edata_size_get(edata) == size)) { + edata_cmp_summary_t temp_summ = + edata_cmp_summary_get(edata); + if (ret == NULL || edata_cmp_summary_comp(temp_summ, + *ret_summ) < 0) { + ret = edata; + *ret_summ = temp_summ; + } + } + } + + return ret; +} + /* * Find an extent with size [min_size, max_size) to satisfy the alignment * requirement. For each size, try only the first extent in the heap. @@ -162,8 +225,19 @@ eset_remove(eset_t *eset, edata_t *edata) { static edata_t * eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size, size_t alignment) { - pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size)); - pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size)); + pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size)); + pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size)); + + /* See comments in eset_first_fit for why we enumerate search below. */ + pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size)); + if (sz_limit_usize_gap_enabled() && pind != pind_prev) { + edata_t *ret = NULL; + ret = eset_enumerate_alignment_search(eset, min_size, pind_prev, + alignment); + if (ret != NULL) { + return ret; + } + } for (pszind_t i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind); @@ -211,8 +285,43 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only, pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size)); if (exact_only) { - return edata_heap_empty(&eset->bins[pind].heap) ? NULL : - edata_heap_first(&eset->bins[pind].heap); + if (sz_limit_usize_gap_enabled()) { + pszind_t pind_prev = + sz_psz2ind(sz_psz_quantize_floor(size)); + return eset_enumerate_search(eset, size, pind_prev, + /* exact_only */ true, &ret_summ); + } else { + return edata_heap_empty(&eset->bins[pind].heap) ? NULL: + edata_heap_first(&eset->bins[pind].heap); + } + } + + /* + * Each element in the eset->bins is a heap corresponding to a size + * class. When sz_limit_usize_gap_enabled() is false, all heaps after + * pind (including pind itself) will surely satisfy the rquests while + * heaps before pind cannot satisfy the request because usize is + * calculated based on size classes then. However, when + * sz_limit_usize_gap_enabled() is true, usize is calculated by ceiling + * user requested size to the closest multiple of PAGE. This means in + * the heap before pind, i.e., pind_prev, there may exist extents able + * to satisfy the request and we should enumerate the heap when + * pind_prev != pind. + * + * For example, when PAGE=4KB and the user requested size is 1MB + 4KB, + * usize would be 1.25MB when sz_limit_usize_gap_enabled() is false. + * pind points to the heap containing extents ranging in + * [1.25MB, 1.5MB). Thus, searching starting from pind will not miss + * any candidates. When sz_limit_usize_gap_enabled() is true, the + * usize would be 1MB + 4KB and pind still points to the same heap. + * In this case, the heap pind_prev points to, which contains extents + * in the range [1MB, 1.25MB), may contain candidates satisfying the + * usize and thus should be enumerated. + */ + pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size)); + if (sz_limit_usize_gap_enabled() && pind != pind_prev){ + ret = eset_enumerate_search(eset, size, pind_prev, + /* exact_only */ false, &ret_summ); } for (pszind_t i = diff --git a/src/psset.c b/src/psset.c index 9a833193..e617f426 100644 --- a/src/psset.c +++ b/src/psset.c @@ -337,18 +337,50 @@ psset_update_end(psset_t *psset, hpdata_t *ps) { hpdata_assert_consistent(ps); } +hpdata_t * +psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) { + if (hpdata_age_heap_empty(&psset->pageslabs[pind])) { + return NULL; + } + + hpdata_t *ps = NULL; + hpdata_age_heap_enumerate_helper_t helper; + hpdata_age_heap_enumerate_prepare(&psset->pageslabs[pind], &helper, + PSSET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *)); + + while ((ps = hpdata_age_heap_enumerate_next(&psset->pageslabs[pind], + &helper))) { + if (hpdata_longest_free_range_get(ps) >= size) { + return ps; + } + } + + return NULL; +} + hpdata_t * psset_pick_alloc(psset_t *psset, size_t size) { assert((size & PAGE_MASK) == 0); assert(size <= HUGEPAGE); pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size)); + hpdata_t *ps = NULL; + + /* See comments in eset_first_fit for why we enumerate search below. */ + pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size)); + if (sz_limit_usize_gap_enabled() && pind_prev < min_pind) { + ps = psset_enumerate_search(psset, pind_prev, size); + if (ps != NULL) { + return ps; + } + } + pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)min_pind); if (pind == PSSET_NPSIZES) { return hpdata_empty_list_first(&psset->empty); } - hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]); + ps = hpdata_age_heap_first(&psset->pageslabs[pind]); if (ps == NULL) { return NULL; } diff --git a/test/unit/ph.c b/test/unit/ph.c index 28f5e488..0339f993 100644 --- a/test/unit/ph.c +++ b/test/unit/ph.c @@ -2,8 +2,9 @@ #include "jemalloc/internal/ph.h" +#define BFS_ENUMERATE_MAX 30 typedef struct node_s node_t; -ph_structs(heap, node_t); +ph_structs(heap, node_t, BFS_ENUMERATE_MAX); struct node_s { #define NODE_MAGIC 0x9823af7e @@ -239,6 +240,22 @@ TEST_BEGIN(test_ph_random) { expect_false(heap_empty(&heap), "Heap should not be empty"); + /* Enumerate nodes. */ + heap_enumerate_helper_t helper; + uint16_t max_queue_size = sizeof(helper.bfs_queue) + / sizeof(void *); + expect_u_eq(max_queue_size, BFS_ENUMERATE_MAX, + "Incorrect bfs queue length initialized"); + assert(max_queue_size == BFS_ENUMERATE_MAX); + heap_enumerate_prepare(&heap, &helper, + BFS_ENUMERATE_MAX, max_queue_size); + size_t node_count = 0; + while(heap_enumerate_next(&heap, &helper)) { + node_count ++; + } + expect_lu_eq(node_count, j, + "Unexpected enumeration results."); + /* Remove nodes. */ switch (i % 6) { case 0: