Keep hugepages in use as long as we can

Hugepages are really hard to get. Currently, we are waiting until we
fill memory region up with data to at least `hpa_hugification_threshold`
and then wait for `hpa_hugify_delay_ms` before we hugify pageslab. For
this reason it seems wasteful to treat hugified pageslabs in the same
way as non-hugified ones. Based on that observation two ideas come to
mind.

We should try to prioritize placing allocation on hugified pageslab to
get performance improvements from hugepage usage immediately. While
there are maybe a better (in terms of fragmentation) pageslab currently
available, empty space on a hugepage just sitting there, waiting for
a better allocation to appear, which might never happen. This unused
memory on a hugepage is counted towards out usage anyway, we better
use it for good.

Same reasoning is applicable for purging prioritization. If we purge
hugepage (`madvise(..., MADV_DONTNEED)`) we'll need to start over again
to assemble it back: filling it up and waiting. Moreover, we might never
assemble hugepage again, because kernel doesn't have continuous 2 MiB
regions anymore. Instead, we should purge non-huge pageslabs as long as
we can, because they are much cheaper to purge and does not provide any
performance benefits.
This commit is contained in:
Dmitry Ilvokhin 2024-11-20 06:56:42 -08:00
parent 6092c980a6
commit b64d7815b7
3 changed files with 239 additions and 139 deletions

View file

@ -27,17 +27,6 @@
*/
#define PSSET_NHUGE 2
/*
* We keep two purge lists per page size class; one for hugified hpdatas (at
* index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
* 1). This lets us implement a preference for purging non-hugified hpdatas
* among similarly-dirty ones.
* We reserve the last two indices for empty slabs, in that case purging
* hugified ones (which are definitionally all waste) before non-hugified ones
* (i.e. reversing the order).
*/
#define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES)
typedef struct psset_bin_stats_s psset_bin_stats_t;
struct psset_bin_stats_s {
/* How many pageslabs are in this bin? */
@ -65,11 +54,7 @@ struct psset_stats_s {
/* Non-huge and huge slabs. */
psset_bin_stats_t slabs[PSSET_NHUGE];
/*
* The second index is huge stats; nonfull_slabs[pszind][0] contains
* stats for the non-huge slabs in bucket pszind, while
* nonfull_slabs[pszind][1] contains stats for the huge slabs.
*/
/* Non-full slabs, distinguished for non-huge and huge slabs. */
psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][PSSET_NHUGE];
/*
@ -88,9 +73,9 @@ struct psset_s {
* The pageslabs, quantized by the size class of the largest contiguous
* free run of pages in a pageslab.
*/
hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
hpdata_age_heap_t pageslabs[PSSET_NHUGE][PSSET_NPSIZES];
/* Bitmap for which set bits correspond to non-empty heaps. */
fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
fb_group_t pageslab_bitmap[PSSET_NHUGE][FB_NGROUPS(PSSET_NPSIZES)];
psset_stats_t stats;
/*
* Slabs with no active allocations, but which are allowed to serve new
@ -102,9 +87,9 @@ struct psset_s {
* to purge them (with later indices indicating slabs we want to purge
* more).
*/
hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS];
hpdata_purge_list_t to_purge[PSSET_NHUGE][PSSET_NPSIZES];
/* Bitmap for which set bits correspond to non-empty purge lists. */
fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)];
fb_group_t purge_bitmap[PSSET_NHUGE][FB_NGROUPS(PSSET_NPSIZES)];
/* Slabs which are available to be hugified. */
hpdata_hugify_list_t to_hugify;
};