Replace PAI vtable dispatch with direct calls

The pai_t interface implements C-style polymorphism via function pointers
to abstract over PAC and HPA. This abstraction provides no real benefit:
only two implementations exist, the dispatcher already knows which one to
use, and HPA stubs 2 of 5 operations. Remove the runtime dispatch in
favor of direct calls.

This commit:
- Promotes pac_alloc/expand/shrink/dalloc/time_until_deferred_work to
  external linkage and replaces the pai_t *self parameter with pac_t *pac.
- Promotes hpa_alloc/expand/shrink/dalloc/time_until_deferred_work to
  external linkage and replaces pai_t *self with hpa_shard_t *shard.
- Updates hpa_dalloc_batch's signature to take hpa_shard_t * directly
  and removes the hpa_from_pai container-of helper. Updates internal
  callers in hpa_alloc, hpa_dalloc, and hpa_sec_flush_impl.
- Drops the vtable assignments from pac_init() and hpa_shard_init().
- Replaces pai_alloc/dalloc/etc. dispatch in pa.c with direct calls.
  HPA expand and shrink (which are unconditional failure stubs) are
  skipped entirely for HPA-owned extents.
- Removes the pa_get_pai() helper.
- Updates tests in test/unit/hpa.c and test/unit/hpa_sec_integration.c
  to call hpa_alloc/dalloc/etc. directly.

The pai_t struct field stays as dead weight in pac_t and hpa_shard_t;
it is removed in the next commit along with pai.h itself.

No behavioral changes.
This commit is contained in:
Guangli Dai 2026-04-20 17:17:45 -07:00 committed by Guangli Dai
parent 163c871d6c
commit 1dfa6f7aa4
14 changed files with 183 additions and 224 deletions

View file

@ -8,18 +8,7 @@
#include "jemalloc/internal/witness.h"
#include "jemalloc/internal/jemalloc_probe.h"
static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
size_t alignment, bool zero, bool guarded, bool frequent_reuse,
bool *deferred_work_generated);
static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
size_t old_size, size_t new_size, bool *deferred_work_generated);
static void hpa_dalloc(
tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
static void hpa_dalloc_batch(tsdn_t *tsdn, hpa_shard_t *shard,
edata_list_active_t *list, bool *deferred_work_generated);
const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
@ -110,17 +99,6 @@ hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
shard->stats.nhugify_failures = 0;
shard->stats.ndehugifies = 0;
/*
* Fill these in last, so that if an hpa_shard gets used despite
* initialization failing, we'll at least crash instead of just
* operating on corrupted data.
*/
shard->pai.alloc = &hpa_alloc;
shard->pai.expand = &hpa_expand;
shard->pai.shrink = &hpa_shrink;
shard->pai.dalloc = &hpa_dalloc;
shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work;
err = sec_init(tsdn, &shard->sec, base, sec_opts);
if (err) {
return true;
@ -820,15 +798,6 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
return nsuccess;
}
static hpa_shard_t *
hpa_from_pai(pai_t *self) {
assert(self->alloc == &hpa_alloc);
assert(self->expand == &hpa_expand);
assert(self->shrink == &hpa_shrink);
assert(self->dalloc == &hpa_dalloc);
return (hpa_shard_t *)self;
}
static void
hpa_assert_results(
tsdn_t *tsdn, hpa_shard_t *shard, edata_list_active_t *results) {
@ -854,9 +823,10 @@ hpa_assert_results(
}
}
static edata_t *
hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
edata_t *
hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment,
bool zero, bool guarded, bool frequent_reuse,
bool *deferred_work_generated) {
assert((size & PAGE_MASK) == 0);
assert(!guarded);
witness_assert_depth_to_rank(
@ -866,7 +836,6 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
if (alignment > PAGE || zero) {
return NULL;
}
hpa_shard_t *shard = hpa_from_pai(self);
/*
* frequent_use here indicates this request comes from the arena bins,
@ -907,7 +876,7 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
/* Unlikely rollback in case of overfill */
if (!edata_list_active_empty(&results)) {
hpa_dalloc_batch(
tsdn, self, &results, deferred_work_generated);
tsdn, shard, &results, deferred_work_generated);
}
}
witness_assert_depth_to_rank(
@ -915,15 +884,15 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
return edata;
}
static bool
hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
bool
hpa_expand(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata, size_t old_size,
size_t new_size, bool zero, bool *deferred_work_generated) {
/* Expand not yet supported. */
return true;
}
static bool
hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
bool
hpa_shrink(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata, size_t old_size,
size_t new_size, bool *deferred_work_generated) {
/* Shrink not yet supported. */
return true;
@ -982,10 +951,8 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
}
static void
hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
hpa_dalloc_batch(tsdn_t *tsdn, hpa_shard_t *shard, edata_list_active_t *list,
bool *deferred_work_generated) {
hpa_shard_t *shard = hpa_from_pai(self);
edata_t *edata;
ql_foreach (edata, &list->head, ql_link_active) {
hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
@ -1003,16 +970,15 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
malloc_mutex_unlock(tsdn, &shard->mtx);
}
static void
hpa_dalloc(
tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
void
hpa_dalloc(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata,
bool *deferred_work_generated) {
assert(!edata_guarded_get(edata));
edata_list_active_t dalloc_list;
edata_list_active_init(&dalloc_list);
edata_list_active_append(&dalloc_list, edata);
hpa_shard_t *shard = hpa_from_pai(self);
sec_dalloc(tsdn, &shard->sec, &dalloc_list);
if (edata_list_active_empty(&dalloc_list)) {
/* sec consumed the pointer */
@ -1020,17 +986,16 @@ hpa_dalloc(
return;
}
/* We may have more than one pointer to flush now */
hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated);
hpa_dalloc_batch(tsdn, shard, &dalloc_list, deferred_work_generated);
}
/*
* Calculate time until either purging or hugification ought to happen.
* Called by background threads.
*/
static uint64_t
hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
hpa_shard_t *shard = hpa_from_pai(self);
uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
uint64_t
hpa_time_until_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
malloc_mutex_lock(tsdn, &shard->mtx);
@ -1090,8 +1055,7 @@ hpa_sec_flush_impl(tsdn_t *tsdn, hpa_shard_t *shard) {
sec_flush(tsdn, &shard->sec, &to_flush);
bool deferred_work_generated;
hpa_dalloc_batch(
tsdn, (pai_t *)shard, &to_flush, &deferred_work_generated);
hpa_dalloc_batch(tsdn, shard, &to_flush, &deferred_work_generated);
}
void

View file

@ -67,7 +67,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
bool
pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
if (hpa_shard_init(tsdn, &shard->hpa_shard, &shard->central->hpa,
if (hpa_shard_init(tsdn, &shard->hpa, &shard->central->hpa,
shard->emap, shard->base, &shard->edata_cache, shard->ind,
hpa_opts, hpa_sec_opts)) {
return true;
@ -82,7 +82,7 @@ void
pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
if (shard->ever_used_hpa) {
hpa_shard_disable(tsdn, &shard->hpa_shard);
hpa_shard_disable(tsdn, &shard->hpa);
}
}
@ -95,7 +95,7 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
void
pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) {
if (shard->ever_used_hpa) {
hpa_shard_flush(tsdn, &shard->hpa_shard);
hpa_shard_flush(tsdn, &shard->hpa);
}
}
@ -108,16 +108,10 @@ void
pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
pac_destroy(tsdn, &shard->pac);
if (shard->ever_used_hpa) {
hpa_shard_destroy(tsdn, &shard->hpa_shard);
hpa_shard_destroy(tsdn, &shard->hpa);
}
}
static pai_t *
pa_get_pai(pa_shard_t *shard, edata_t *edata) {
return (edata_pai_get(edata) == EXTENT_PAI_PAC ? &shard->pac.pai
: &shard->hpa_shard.pai);
}
edata_t *
pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
bool slab, szind_t szind, bool zero, bool guarded,
@ -128,7 +122,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
edata_t *edata = NULL;
if (!guarded && pa_shard_uses_hpa(shard)) {
edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
edata = hpa_alloc(tsdn, &shard->hpa, size, alignment,
zero, /* guarded */ false, slab, deferred_work_generated);
}
/*
@ -136,7 +130,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
* allocation request.
*/
if (edata == NULL) {
edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero,
edata = pac_alloc(tsdn, &shard->pac, size, alignment, zero,
guarded, slab, deferred_work_generated);
}
if (edata != NULL) {
@ -164,10 +158,15 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
}
size_t expand_amount = new_size - old_size;
pai_t *pai = pa_get_pai(shard, edata);
bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero,
deferred_work_generated);
/*
* HPA expand always fails (it's a stub); skip the call entirely for
* HPA-owned extents.
*/
if (edata_pai_get(edata) == EXTENT_PAI_HPA) {
return true;
}
bool error = pac_expand(tsdn, &shard->pac, edata, old_size, new_size,
zero, deferred_work_generated);
if (error) {
return true;
}
@ -189,9 +188,15 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
}
size_t shrink_amount = old_size - new_size;
pai_t *pai = pa_get_pai(shard, edata);
bool error = pai_shrink(
tsdn, pai, edata, old_size, new_size, deferred_work_generated);
/*
* HPA shrink always fails (it's a stub); skip the call entirely for
* HPA-owned extents.
*/
if (edata_pai_get(edata) == EXTENT_PAI_HPA) {
return true;
}
bool error = pac_shrink(tsdn, &shard->pac, edata, old_size, new_size,
deferred_work_generated);
if (error) {
return true;
}
@ -216,8 +221,11 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
edata_addr_set(edata, edata_base_get(edata));
edata_szind_set(edata, SC_NSIZES);
pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
pai_t *pai = pa_get_pai(shard, edata);
pai_dalloc(tsdn, pai, edata, deferred_work_generated);
if (edata_pai_get(edata) == EXTENT_PAI_HPA) {
hpa_dalloc(tsdn, &shard->hpa, edata, deferred_work_generated);
} else {
pac_dalloc(tsdn, &shard->pac, edata, deferred_work_generated);
}
}
bool
@ -236,14 +244,14 @@ pa_shard_set_deferral_allowed(
tsdn_t *tsdn, pa_shard_t *shard, bool deferral_allowed) {
if (pa_shard_uses_hpa(shard)) {
hpa_shard_set_deferral_allowed(
tsdn, &shard->hpa_shard, deferral_allowed);
tsdn, &shard->hpa, deferral_allowed);
}
}
void
pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
if (pa_shard_uses_hpa(shard)) {
hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
hpa_shard_do_deferred_work(tsdn, &shard->hpa);
}
}
@ -254,14 +262,14 @@ pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
*/
uint64_t
pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
uint64_t time = pai_time_until_deferred_work(tsdn, &shard->pac.pai);
uint64_t time = pac_time_until_deferred_work(tsdn, &shard->pac);
if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
return time;
}
if (pa_shard_uses_hpa(shard)) {
uint64_t hpa = pai_time_until_deferred_work(
tsdn, &shard->hpa_shard.pai);
uint64_t hpa = hpa_time_until_deferred_work(
tsdn, &shard->hpa);
if (hpa < time) {
time = hpa;
}

View file

@ -17,7 +17,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
void
pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
if (shard->ever_used_hpa) {
hpa_shard_prefork2(tsdn, &shard->hpa_shard);
hpa_shard_prefork2(tsdn, &shard->hpa);
}
}
@ -25,7 +25,7 @@ void
pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
if (shard->ever_used_hpa) {
hpa_shard_prefork3(tsdn, &shard->hpa_shard);
hpa_shard_prefork3(tsdn, &shard->hpa);
}
}
@ -36,7 +36,7 @@ pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
ecache_prefork(tsdn, &shard->pac.ecache_retained);
ecache_prefork(tsdn, &shard->pac.ecache_pinned);
if (shard->ever_used_hpa) {
hpa_shard_prefork4(tsdn, &shard->hpa_shard);
hpa_shard_prefork4(tsdn, &shard->hpa);
}
}
@ -56,7 +56,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
hpa_shard_postfork_parent(tsdn, &shard->hpa);
}
}
@ -71,7 +71,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
hpa_shard_postfork_child(tsdn, &shard->hpa);
}
}
@ -84,7 +84,7 @@ size_t
pa_shard_ndirty(const pa_shard_t *shard) {
size_t ndirty = ecache_npages_get(&shard->pac.ecache_dirty);
if (shard->ever_used_hpa) {
ndirty += psset_ndirty(&shard->hpa_shard.psset);
ndirty += psset_ndirty(&shard->hpa.psset);
}
return ndirty;
}
@ -177,7 +177,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
}
if (shard->ever_used_hpa) {
hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
hpa_shard_stats_merge(tsdn, &shard->hpa, hpa_stats_out);
}
}
@ -209,11 +209,11 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
if (shard->ever_used_hpa) {
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->hpa_shard.mtx, arena_prof_mutex_hpa_shard);
&shard->hpa.mtx, arena_prof_mutex_hpa_shard);
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->hpa_shard.grow_mtx,
&shard->hpa.grow_mtx,
arena_prof_mutex_hpa_shard_grow);
sec_mutex_stats_read(tsdn, &shard->hpa_shard.sec,
sec_mutex_stats_read(tsdn, &shard->hpa.sec,
&mutex_prof_data[arena_prof_mutex_hpa_sec]);
}
}

View file

@ -4,17 +4,6 @@
#include "jemalloc/internal/pac.h"
#include "jemalloc/internal/san.h"
static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
size_t alignment, bool zero, bool guarded, bool frequent_reuse,
bool *deferred_work_generated);
static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
size_t old_size, size_t new_size, bool *deferred_work_generated);
static void pac_dalloc_impl(
tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
static inline void
pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay,
pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
@ -103,12 +92,6 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
pac->stats_mtx = stats_mtx;
atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
pac->pai.alloc = &pac_alloc_impl;
pac->pai.expand = &pac_expand_impl;
pac->pai.shrink = &pac_shrink_impl;
pac->pai.dalloc = &pac_dalloc_impl;
pac->pai.time_until_deferred_work = &pac_time_until_deferred_work;
return false;
}
@ -265,11 +248,10 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
return edata;
}
static edata_t *
pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
edata_t *
pac_alloc(tsdn_t *tsdn, pac_t *pac, size_t size, size_t alignment,
bool zero, bool guarded, bool frequent_reuse,
bool *deferred_work_generated) {
pac_t *pac = (pac_t *)self;
ehooks_t *ehooks = pac_ehooks_get(pac);
edata_t *edata = NULL;
@ -292,10 +274,11 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
return edata;
}
static bool
pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
bool
pac_expand(tsdn_t *tsdn, pac_t *pac, edata_t *edata, size_t old_size,
size_t new_size, bool zero, bool *deferred_work_generated) {
pac_t *pac = (pac_t *)self;
assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
ehooks_t *ehooks = pac_ehooks_get(pac);
size_t mapped_add = 0;
@ -360,10 +343,11 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
return false;
}
static bool
pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
bool
pac_shrink(tsdn_t *tsdn, pac_t *pac, edata_t *edata, size_t old_size,
size_t new_size, bool *deferred_work_generated) {
pac_t *pac = (pac_t *)self;
assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
ehooks_t *ehooks = pac_ehooks_get(pac);
size_t shrink_amount = old_size - new_size;
@ -385,10 +369,11 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
return false;
}
static void
pac_dalloc_impl(
tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
pac_t *pac = (pac_t *)self;
void
pac_dalloc(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
bool *deferred_work_generated) {
assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
ehooks_t *ehooks = pac_ehooks_get(pac);
if (edata_guarded_get(edata)) {
@ -432,10 +417,9 @@ pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
return result;
}
static uint64_t
pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
uint64_t
pac_time_until_deferred_work(tsdn_t *tsdn, pac_t *pac) {
uint64_t time;
pac_t *pac = (pac_t *)self;
time = pac_ns_until_purge(
tsdn, &pac->decay_dirty, ecache_npages_get(&pac->ecache_dirty));