jemalloc/src/pa_extra.c
Guangli Dai 1dfa6f7aa4 Replace PAI vtable dispatch with direct calls
The pai_t interface implements C-style polymorphism via function pointers
to abstract over PAC and HPA. This abstraction provides no real benefit:
only two implementations exist, the dispatcher already knows which one to
use, and HPA stubs 2 of 5 operations. Remove the runtime dispatch in
favor of direct calls.

This commit:
- Promotes pac_alloc/expand/shrink/dalloc/time_until_deferred_work to
  external linkage and replaces the pai_t *self parameter with pac_t *pac.
- Promotes hpa_alloc/expand/shrink/dalloc/time_until_deferred_work to
  external linkage and replaces pai_t *self with hpa_shard_t *shard.
- Updates hpa_dalloc_batch's signature to take hpa_shard_t * directly
  and removes the hpa_from_pai container-of helper. Updates internal
  callers in hpa_alloc, hpa_dalloc, and hpa_sec_flush_impl.
- Drops the vtable assignments from pac_init() and hpa_shard_init().
- Replaces pai_alloc/dalloc/etc. dispatch in pa.c with direct calls.
  HPA expand and shrink (which are unconditional failure stubs) are
  skipped entirely for HPA-owned extents.
- Removes the pa_get_pai() helper.
- Updates tests in test/unit/hpa.c and test/unit/hpa_sec_integration.c
  to call hpa_alloc/dalloc/etc. directly.

The pai_t struct field stays as dead weight in pac_t and hpa_shard_t;
it is removed in the next commit along with pai.h itself.

No behavioral changes.
2026-05-12 13:43:16 -07:00

219 lines
7.8 KiB
C

#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
/*
* This file is logically part of the PA module. While pa.c contains the core
* allocator functionality, this file contains boring integration functionality;
* things like the pre- and post- fork handlers, and stats merging for CTL
* refreshes.
*/
void
pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
malloc_mutex_prefork(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_prefork(tsdn, &shard->pac.decay_muzzy.mtx);
}
void
pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
if (shard->ever_used_hpa) {
hpa_shard_prefork2(tsdn, &shard->hpa);
}
}
void
pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
if (shard->ever_used_hpa) {
hpa_shard_prefork3(tsdn, &shard->hpa);
}
}
void
pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
ecache_prefork(tsdn, &shard->pac.ecache_dirty);
ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
ecache_prefork(tsdn, &shard->pac.ecache_retained);
ecache_prefork(tsdn, &shard->pac.ecache_pinned);
if (shard->ever_used_hpa) {
hpa_shard_prefork4(tsdn, &shard->hpa);
}
}
void
pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard) {
edata_cache_prefork(tsdn, &shard->edata_cache);
}
void
pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
edata_cache_postfork_parent(tsdn, &shard->edata_cache);
ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
ecache_postfork_parent(tsdn, &shard->pac.ecache_pinned);
malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx);
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
hpa_shard_postfork_parent(tsdn, &shard->hpa);
}
}
void
pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
edata_cache_postfork_child(tsdn, &shard->edata_cache);
ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
ecache_postfork_child(tsdn, &shard->pac.ecache_pinned);
malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx);
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
hpa_shard_postfork_child(tsdn, &shard->hpa);
}
}
size_t
pa_shard_nactive(const pa_shard_t *shard) {
return atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
}
size_t
pa_shard_ndirty(const pa_shard_t *shard) {
size_t ndirty = ecache_npages_get(&shard->pac.ecache_dirty);
if (shard->ever_used_hpa) {
ndirty += psset_ndirty(&shard->hpa.psset);
}
return ndirty;
}
size_t
pa_shard_nmuzzy(const pa_shard_t *shard) {
return ecache_npages_get(&shard->pac.ecache_muzzy);
}
void
pa_shard_basic_stats_merge(
const pa_shard_t *shard, size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
*nactive += pa_shard_nactive(shard);
*ndirty += pa_shard_ndirty(shard);
*nmuzzy += pa_shard_nmuzzy(shard);
}
void
pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
cassert(config_stats);
pa_shard_stats_out->pac_stats.retained +=
ecache_npages_get(&shard->pac.ecache_retained) << LG_PAGE;
pa_shard_stats_out->pac_stats.pinned +=
ecache_npages_get(&shard->pac.ecache_pinned) << LG_PAGE;
pa_shard_stats_out->edata_avail += atomic_load_zu(
&shard->edata_cache.count, ATOMIC_RELAXED);
size_t resident_pgs = 0;
resident_pgs += pa_shard_nactive(shard);
resident_pgs += pa_shard_ndirty(shard);
resident_pgs += ecache_npages_get(&shard->pac.ecache_pinned);
*resident += (resident_pgs << LG_PAGE);
/* Dirty decay stats */
locked_inc_u64_unsynchronized(
&pa_shard_stats_out->pac_stats.decay_dirty.npurge,
locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
&shard->pac.stats->decay_dirty.npurge));
locked_inc_u64_unsynchronized(
&pa_shard_stats_out->pac_stats.decay_dirty.nmadvise,
locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
&shard->pac.stats->decay_dirty.nmadvise));
locked_inc_u64_unsynchronized(
&pa_shard_stats_out->pac_stats.decay_dirty.purged,
locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
&shard->pac.stats->decay_dirty.purged));
/* Muzzy decay stats */
locked_inc_u64_unsynchronized(
&pa_shard_stats_out->pac_stats.decay_muzzy.npurge,
locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
&shard->pac.stats->decay_muzzy.npurge));
locked_inc_u64_unsynchronized(
&pa_shard_stats_out->pac_stats.decay_muzzy.nmadvise,
locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
&shard->pac.stats->decay_muzzy.nmadvise));
locked_inc_u64_unsynchronized(
&pa_shard_stats_out->pac_stats.decay_muzzy.purged,
locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
&shard->pac.stats->decay_muzzy.purged));
atomic_load_add_store_zu(&pa_shard_stats_out->pac_stats.abandoned_vm,
atomic_load_zu(&shard->pac.stats->abandoned_vm, ATOMIC_RELAXED));
for (pszind_t i = 0; i < SC_NPSIZES; i++) {
size_t dirty, muzzy, retained, pinned, dirty_bytes,
muzzy_bytes, retained_bytes, pinned_bytes;
dirty = ecache_nextents_get(&shard->pac.ecache_dirty, i);
muzzy = ecache_nextents_get(&shard->pac.ecache_muzzy, i);
retained = ecache_nextents_get(&shard->pac.ecache_retained, i);
pinned = ecache_nextents_get(&shard->pac.ecache_pinned, i);
dirty_bytes = ecache_nbytes_get(&shard->pac.ecache_dirty, i);
muzzy_bytes = ecache_nbytes_get(&shard->pac.ecache_muzzy, i);
retained_bytes = ecache_nbytes_get(
&shard->pac.ecache_retained, i);
pinned_bytes = ecache_nbytes_get(
&shard->pac.ecache_pinned, i);
estats_out[i].ndirty = dirty;
estats_out[i].nmuzzy = muzzy;
estats_out[i].nretained = retained;
estats_out[i].npinned = pinned;
estats_out[i].dirty_bytes = dirty_bytes;
estats_out[i].muzzy_bytes = muzzy_bytes;
estats_out[i].retained_bytes = retained_bytes;
estats_out[i].pinned_bytes = pinned_bytes;
}
if (shard->ever_used_hpa) {
hpa_shard_stats_merge(tsdn, &shard->hpa, hpa_stats_out);
}
}
static void
pa_shard_mtx_stats_read_single(tsdn_t *tsdn, mutex_prof_data_t *mutex_prof_data,
malloc_mutex_t *mtx, int ind) {
malloc_mutex_lock(tsdn, mtx);
malloc_mutex_prof_read(tsdn, &mutex_prof_data[ind], mtx);
malloc_mutex_unlock(tsdn, mtx);
}
void
pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]) {
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->edata_cache.mtx, arena_prof_mutex_extent_avail);
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->pac.ecache_dirty.mtx, arena_prof_mutex_extents_dirty);
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->pac.ecache_muzzy.mtx, arena_prof_mutex_extents_muzzy);
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->pac.ecache_retained.mtx, arena_prof_mutex_extents_retained);
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->pac.ecache_pinned.mtx, arena_prof_mutex_extents_pinned);
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->pac.decay_dirty.mtx, arena_prof_mutex_decay_dirty);
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
if (shard->ever_used_hpa) {
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->hpa.mtx, arena_prof_mutex_hpa_shard);
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->hpa.grow_mtx,
arena_prof_mutex_hpa_shard_grow);
sec_mutex_stats_read(tsdn, &shard->hpa.sec,
&mutex_prof_data[arena_prof_mutex_hpa_sec]);
}
}