Extend purging algorithm with peak demand tracking

Implementation inspired by idea described in "Beyond malloc efficiency
to fleet efficiency: a hugepage-aware memory allocator" paper [1].

Primary idea is to track maximum number (peak) of active pages in use
with sliding window and then use this number to decide how many dirty
pages we would like to keep.

We are trying to estimate maximum amount of active memory we'll need in
the near future. We do so by projecting future active memory demand
(based on peak active memory usage we observed in the past within
sliding window) and adding slack on top of it (an overhead is reasonable
to have in exchange of higher hugepages coverage). When peak demand
tracking is off, projection of future active memory is active memory we
are having right now.

Estimation is essentially the same as `nactive_max * (1 + dirty_mult)`.

Peak demand purging algorithm controlled by two config options. Option
`hpa_peak_demand_window_ms` controls duration of sliding window we track
maximum active memory usage in and option `hpa_dirty_mult` controls
amount of slack we are allowed to have as a percent from maximum active
memory usage. By default `hpa_peak_demand_window_ms == 0` now and we
have same behaviour (ratio based purging) that we had before this
commit.

[1]: https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
This commit is contained in:
Dmitry Ilvokhin 2025-01-21 07:20:15 -08:00
parent 499f306859
commit d36aa77e8a
20 changed files with 537 additions and 29 deletions

View file

@ -10,6 +10,7 @@
#include "jemalloc/internal/hpa_opts.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/pai.h"
#include "jemalloc/internal/peak_demand.h"
#include "jemalloc/internal/psset.h"
typedef struct hpa_central_s hpa_central_t;
@ -147,6 +148,9 @@ struct hpa_shard_s {
* Last time we performed purge on this shard.
*/
nstime_t last_purge;
/* Peak active memory sliding window statistics. */
peak_demand_t peak_demand;
};
bool hpa_hugepage_size_exceeds_limit();

View file

@ -27,7 +27,8 @@ struct hpa_shard_opts_s {
/*
* The HPA purges whenever the number of pages exceeds dirty_mult *
* active_pages. This may be set to (fxp_t)-1 to disable purging.
* peak_active_pages. This may be set to (fxp_t)-1 to disable
* purging.
*/
fxp_t dirty_mult;
@ -59,6 +60,13 @@ struct hpa_shard_opts_s {
* Maximum number of hugepages to purge on each purging attempt.
*/
ssize_t experimental_max_purge_nhp;
/*
* Sliding window duration to track active memory demand statistics.
* This might be set to 0, to disable sliding window statistics
* tracking and use current number of active pages for purging instead.
*/
uint64_t peak_demand_window_ms;
};
#define HPA_SHARD_OPTS_DEFAULT { \
@ -83,7 +91,9 @@ struct hpa_shard_opts_s {
/* min_purge_interval_ms */ \
5 * 1000, \
/* experimental_max_purge_nhp */ \
-1 \
-1, \
/* peak_demand_window_ms */ \
0 \
}
#endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */

View file

@ -0,0 +1,55 @@
#ifndef JEMALLOC_INTERNAL_PEAK_DEMAND_H
#define JEMALLOC_INTERNAL_PEAK_DEMAND_H
#include "jemalloc/internal/jemalloc_preamble.h"
/*
* Implementation of peak active memory demand tracking.
*
* Inspired by "Beyond malloc efficiency to fleet efficiency: a hugepage-aware
* memory allocator" whitepaper.
* https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
*
* End goal is to track peak active memory usage over specified time interval.
* We do so by dividing this time interval into disjoint subintervals and
* storing value of maximum memory usage for each subinterval in a circular
* buffer. Nanoseconds resolution timestamp uniquely maps into epoch, which is
* used as an index to access circular buffer.
*/
#define PEAK_DEMAND_LG_BUCKETS 4
/*
* Number of buckets should be power of 2 to ensure modulo operation is
* optimized to bit masking by the compiler.
*/
#define PEAK_DEMAND_NBUCKETS (1 << PEAK_DEMAND_LG_BUCKETS)
typedef struct peak_demand_s peak_demand_t;
struct peak_demand_s {
/*
* Absolute value of current epoch, monotonically increases over time. Epoch
* value modulo number of buckets used as an index to access nactive_max
* array.
*/
uint64_t epoch;
/* How many nanoseconds each epoch approximately takes. */
uint64_t epoch_interval_ns;
/*
* Circular buffer to track maximum number of active pages for each
* epoch.
*/
size_t nactive_max[PEAK_DEMAND_NBUCKETS];
};
void peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms);
/* Updates peak demand statistics with current number of active pages. */
void peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
size_t nactive);
/* Returns maximum number of active pages in sliding window. */
size_t peak_demand_nactive_max(peak_demand_t *peak_demand);
#endif /* JEMALLOC_INTERNAL_PEAK_DEMAND_H */