Add configurable HPA opts, shard count override, and bump MAX_ALLOCATIONS

Three changes to make pa_microbench easier to drive for fragmentation
experiments:

- Replace HPA_SHARD_OPTS_DEFAULT use with a single editable g_hpa_opts
  global. The microbench does not consult MALLOC_CONF for HPA shard opts,
  so this is the place to set the baseline configuration (slab_max_alloc,
  hugification_threshold, dirty_mult, hugify_delay_ms, purge_threshold,
  hugify_style, etc.).

- Add -n/--nshards N to override the shard count derived from the trace.
  When set, each event is routed to (event->shard_ind % N), letting us
  study the impact of arena consolidation. Without the flag the behavior
  is unchanged (num_shards = max_shard_id + 1).

- Bump MAX_ALLOCATIONS from 10M to 200M so the full ~50M-event adfinder
  trace (and similar) fits in the in-memory event buffer.
This commit is contained in:
Slobodan Predolac 2026-04-28 16:35:16 -07:00
parent 6e0b8e6daa
commit 4cc497a4a7

View file

@ -26,7 +26,7 @@
*/
#define MAX_LINE_LENGTH 1024
#define MAX_ALLOCATIONS 10000000
#define MAX_ALLOCATIONS 200000000
#define MAX_ARENAS 128
typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
@ -75,6 +75,12 @@ static size_t g_alloc_counter = 0; /* Global allocation counter */
static allocation_record_t *g_alloc_records =
NULL; /* Global allocation tracking */
static bool g_use_sec = true; /* Global flag for SEC vs HPA-only */
/*
* Override for the number of shards used by the microbench. -1 means use the
* value implied by the trace (max_shard_id + 1). When set to a positive value,
* each event's shard_ind is collapsed to (shard_ind % g_nshards_override).
*/
static int g_nshards_override = -1;
/* Refactored arrays using structures */
static shard_stats_t *g_shard_stats = NULL; /* Per-shard tracking statistics */
@ -82,6 +88,25 @@ static shard_infrastructure_t *g_shard_infra =
NULL; /* Per-shard PA infrastructure */
static pa_central_t g_pa_central; /* Global PA central */
/*
* HPA shard opts used by the microbench. Edit these values to control the
* baseline configuration. Changing these here is what controls the run; the
* MALLOC_CONF env var is NOT consulted for HPA shard opts in this microbench.
*/
static hpa_shard_opts_t g_hpa_opts = {
/* slab_max_alloc */ 128 * 1024,
/* hugification_threshold */ HUGEPAGE * 84 / 100,
/* dirty_mult */ FXP_INIT_PERCENT(30),
/* deferral_allowed */ false,
/* hugify_delay_ms */ 7804,
/* hugify_sync */ false,
/* min_purge_interval_ms */ 5 * 1000,
/* experimental_max_purge_nhp */ -1,
/* purge_threshold */ HUGEPAGE,
/* min_purge_delay_ms */ 0,
/* hugify_style */ hpa_hugify_style_eager
};
/* Override for curtime */
static hpa_hooks_t hpa_hooks_override;
static nstime_t cur_time_clock;
@ -212,9 +237,7 @@ initialize_pa_infrastructure(int num_shards) {
}
/* Enable HPA for this shard with proper configuration */
hpa_shard_opts_t hpa_opts = HPA_SHARD_OPTS_DEFAULT;
hpa_opts.deferral_allowed =
false; /* No background threads in microbench */
hpa_shard_opts_t hpa_opts = g_hpa_opts;
sec_opts_t sec_opts = SEC_OPTS_DEFAULT;
if (!g_use_sec) {
@ -417,14 +440,11 @@ simulate_trace(
for (size_t i = 0; i < count; i++) {
pa_event_t *event = &events[i];
/* Validate shard index */
if (event->shard_ind >= num_shards) {
fprintf(stderr,
"Warning: Invalid shard index %d (max %d)\n",
event->shard_ind, num_shards - 1);
continue;
}
/*
* Collapse trace shard index into the active shard space. With
* no override num_shards == max_shard_id+1, so this is a no-op.
*/
int shard_ind = event->shard_ind % num_shards;
set_clock(event->nsecs);
switch (event->operation) {
@ -441,7 +461,7 @@ simulate_trace(
/* Allocate using PA allocator */
edata_t *edata = pa_alloc(tsdn,
&g_shard_infra[event->shard_ind].pa_shard, size,
&g_shard_infra[shard_ind].pa_shard, size,
PAGE /* alignment */, slab, szind, false /* zero */,
false /* guarded */, &deferred_work_generated);
@ -450,14 +470,13 @@ simulate_trace(
g_alloc_records[g_alloc_counter].edata = edata;
g_alloc_records[g_alloc_counter].size = size;
g_alloc_records[g_alloc_counter].shard_ind =
event->shard_ind;
shard_ind;
g_alloc_records[g_alloc_counter].active = true;
g_alloc_counter++;
/* Update shard-specific stats */
g_shard_stats[event->shard_ind].alloc_count++;
g_shard_stats[event->shard_ind]
.bytes_allocated += size;
g_shard_stats[shard_ind].alloc_count++;
g_shard_stats[shard_ind].bytes_allocated += size;
total_allocs++;
total_allocated_bytes += size;
@ -469,21 +488,20 @@ simulate_trace(
if (alloc_index < g_alloc_counter
&& g_alloc_records[alloc_index].active
&& g_alloc_records[alloc_index].shard_ind
== event->shard_ind) {
== shard_ind) {
/* Get tsdn for PA */
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
bool deferred_work_generated = false;
/* Deallocate using PA allocator */
pa_dalloc(tsdn,
&g_shard_infra[event->shard_ind].pa_shard,
&g_shard_infra[shard_ind].pa_shard,
g_alloc_records[alloc_index].edata,
&deferred_work_generated);
/* Update shard-specific stats */
g_shard_stats[event->shard_ind].dealloc_count++;
g_shard_stats[event->shard_ind]
.bytes_allocated -=
g_shard_stats[shard_ind].dealloc_count++;
g_shard_stats[shard_ind].bytes_allocated -=
g_alloc_records[alloc_index].size;
g_alloc_records[alloc_index].active = false;
@ -560,6 +578,9 @@ print_usage(const char *program) {
printf(" -p, --hpa-only Use HPA only (no SEC)\n");
printf(
" -i, --interval N Stats print interval (default: 100000, 0=disable)\n");
printf(
" -n, --nshards N Force using N shards (events routed via shard_ind %% N).\n"
" Default: derived from trace (max_shard_id + 1)\n");
printf(
"\nTrace file format: shard_ind,operation,size_or_alloc_index,is_frequent\n");
printf(" - operation: 0=alloc, 1=dealloc\n");
@ -601,6 +622,21 @@ main(int argc, char *argv[]) {
return 1;
}
stats_interval = (size_t)atol(argv[++i]);
} else if (strcmp(argv[i], "-n") == 0
|| strcmp(argv[i], "--nshards") == 0) {
if (i + 1 >= argc) {
fprintf(stderr,
"Error: %s requires an argument\n",
argv[i]);
return 1;
}
g_nshards_override = atoi(argv[++i]);
if (g_nshards_override <= 0) {
fprintf(stderr,
"Error: --nshards must be > 0 (got %d)\n",
g_nshards_override);
return 1;
}
} else if (argv[i][0] != '-') {
trace_file = argv[i];
} else {
@ -651,7 +687,11 @@ main(int argc, char *argv[]) {
return 1;
}
int num_shards = max_shard_id + 1; /* shard IDs are 0-based */
int num_shards = (g_nshards_override > 0) ? g_nshards_override
: (max_shard_id + 1);
printf("Shards: %d (trace max=%d, override=%s)\n", num_shards,
max_shard_id,
g_nshards_override > 0 ? "yes" : "no");
if (num_shards > MAX_ARENAS) {
fprintf(stderr, "Error: Too many arenas required (%d > %d)\n",
num_shards, MAX_ARENAS);