mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-05-14 00:46:21 +03:00
Implementation inspired by idea described in "Beyond malloc efficiency to fleet efficiency: a hugepage-aware memory allocator" paper [1]. Primary idea is to track maximum number (peak) of active pages in use with sliding window and then use this number to decide how many dirty pages we would like to keep. We are trying to estimate maximum amount of active memory we'll need in the near future. We do so by projecting future active memory demand (based on peak active memory usage we observed in the past within sliding window) and adding slack on top of it (an overhead is reasonable to have in exchange of higher hugepages coverage). When peak demand tracking is off, projection of future active memory is active memory we are having right now. Estimation is essentially the same as `nactive_max * (1 + dirty_mult)`. Peak demand purging algorithm controlled by two config options. Option `hpa_peak_demand_window_ms` controls duration of sliding window we track maximum active memory usage in and option `hpa_dirty_mult` controls amount of slack we are allowed to have as a percent from maximum active memory usage. By default `hpa_peak_demand_window_ms == 0` now and we have same behaviour (ratio based purging) that we had before this commit. [1]: https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
74 lines
2.3 KiB
C
74 lines
2.3 KiB
C
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
|
|
|
#include "jemalloc/internal/peak_demand.h"
|
|
|
|
void
|
|
peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms) {
|
|
assert(interval_ms > 0);
|
|
peak_demand->epoch = 0;
|
|
uint64_t interval_ns = interval_ms * 1000 * 1000;
|
|
peak_demand->epoch_interval_ns = interval_ns / PEAK_DEMAND_NBUCKETS;
|
|
memset(peak_demand->nactive_max, 0, sizeof(peak_demand->nactive_max));
|
|
}
|
|
|
|
static uint64_t
|
|
peak_demand_epoch_ind(peak_demand_t *peak_demand) {
|
|
return peak_demand->epoch % PEAK_DEMAND_NBUCKETS;
|
|
}
|
|
|
|
static nstime_t
|
|
peak_demand_next_epoch_advance(peak_demand_t *peak_demand) {
|
|
uint64_t epoch = peak_demand->epoch;
|
|
uint64_t ns = (epoch + 1) * peak_demand->epoch_interval_ns;
|
|
nstime_t next;
|
|
nstime_init(&next, ns);
|
|
return next;
|
|
}
|
|
|
|
static uint64_t
|
|
peak_demand_maybe_advance_epoch(peak_demand_t *peak_demand,
|
|
const nstime_t *now) {
|
|
nstime_t next_epoch_advance =
|
|
peak_demand_next_epoch_advance(peak_demand);
|
|
if (nstime_compare(now, &next_epoch_advance) < 0) {
|
|
return peak_demand_epoch_ind(peak_demand);
|
|
}
|
|
uint64_t next_epoch = nstime_ns(now) / peak_demand->epoch_interval_ns;
|
|
assert(next_epoch > peak_demand->epoch);
|
|
/*
|
|
* If we missed more epochs, than capacity of circular buffer
|
|
* (PEAK_DEMAND_NBUCKETS), re-write no more than PEAK_DEMAND_NBUCKETS
|
|
* items as we don't want to zero out same item multiple times.
|
|
*/
|
|
if (peak_demand->epoch + PEAK_DEMAND_NBUCKETS < next_epoch) {
|
|
peak_demand->epoch = next_epoch - PEAK_DEMAND_NBUCKETS;
|
|
}
|
|
while (peak_demand->epoch < next_epoch) {
|
|
++peak_demand->epoch;
|
|
uint64_t ind = peak_demand_epoch_ind(peak_demand);
|
|
peak_demand->nactive_max[ind] = 0;
|
|
}
|
|
return peak_demand_epoch_ind(peak_demand);
|
|
}
|
|
|
|
void
|
|
peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
|
|
size_t nactive) {
|
|
uint64_t ind = peak_demand_maybe_advance_epoch(peak_demand, now);
|
|
size_t *epoch_nactive = &peak_demand->nactive_max[ind];
|
|
if (nactive > *epoch_nactive) {
|
|
*epoch_nactive = nactive;
|
|
}
|
|
}
|
|
|
|
size_t
|
|
peak_demand_nactive_max(peak_demand_t *peak_demand) {
|
|
size_t nactive_max = peak_demand->nactive_max[0];
|
|
for (int i = 1; i < PEAK_DEMAND_NBUCKETS; ++i) {
|
|
if (peak_demand->nactive_max[i] > nactive_max) {
|
|
nactive_max = peak_demand->nactive_max[i];
|
|
}
|
|
}
|
|
return nactive_max;
|
|
}
|