Add configurable HPA opts, shard count override, and bump MAX_ALLOCATIONS

Three changes to make pa_microbench easier to drive for fragmentation experiments: - Replace HPA_SHARD_OPTS_DEFAULT use with a single editable g_hpa_opts global. The microbench does not consult MALLOC_CONF for HPA shard opts, so this is the place to set the baseline configuration (slab_max_alloc, hugification_threshold, dirty_mult, hugify_delay_ms, purge_threshold, hugify_style, etc.). - Add -n/--nshards N to override the shard count derived from the trace. When set, each event is routed to (event->shard_ind % N), letting us study the impact of arena consolidation. Without the flag the behavior is unchanged (num_shards = max_shard_id + 1). - Bump MAX_ALLOCATIONS from 10M to 200M so the full ~50M-event adfinder trace (and similar) fits in the in-memory event buffer.
2026-05-01 01:28:00 +03:00 · 2026-04-28 16:35:16 -07:00 · 2026-04-28 16:35:16 -07:00 · 4cc497a4a7
commit 4cc497a4a7
parent 6e0b8e6daa
1 changed files with 63 additions and 23 deletions
--- a/test/stress/pa/pa_microbench.c
+++ b/test/stress/pa/pa_microbench.c
@ -26,7 +26,7 @@
 */

 #define MAX_LINE_LENGTH 1024
-#define MAX_ALLOCATIONS 10000000
+#define MAX_ALLOCATIONS 200000000
 #define MAX_ARENAS 128

 typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
@ -75,6 +75,12 @@ static size_t               g_alloc_counter = 0; /* Global allocation counter */
 static allocation_record_t *g_alloc_records =
    NULL;                     /* Global allocation tracking */
 static bool g_use_sec = true; /* Global flag for SEC vs HPA-only */
+/*
+ * Override for the number of shards used by the microbench. -1 means use the
+ * value implied by the trace (max_shard_id + 1). When set to a positive value,
+ * each event's shard_ind is collapsed to (shard_ind % g_nshards_override).
+ */
+static int g_nshards_override = -1;

 /* Refactored arrays using structures */
 static shard_stats_t *g_shard_stats = NULL; /* Per-shard tracking statistics */
@ -82,6 +88,25 @@ static shard_infrastructure_t *g_shard_infra =
    NULL;                         /* Per-shard PA infrastructure */
 static pa_central_t g_pa_central; /* Global PA central */

+/*
+ * HPA shard opts used by the microbench. Edit these values to control the
+ * baseline configuration. Changing these here is what controls the run; the
+ * MALLOC_CONF env var is NOT consulted for HPA shard opts in this microbench.
+ */
+static hpa_shard_opts_t g_hpa_opts = {
+	/* slab_max_alloc */            128 * 1024,
+	/* hugification_threshold */    HUGEPAGE * 84 / 100,
+	/* dirty_mult */                FXP_INIT_PERCENT(30),
+	/* deferral_allowed */          false,
+	/* hugify_delay_ms */           7804,
+	/* hugify_sync */               false,
+	/* min_purge_interval_ms */     5 * 1000,
+	/* experimental_max_purge_nhp */ -1,
+	/* purge_threshold */           HUGEPAGE,
+	/* min_purge_delay_ms */        0,
+	/* hugify_style */              hpa_hugify_style_eager
+};
+
 /* Override for curtime */
 static hpa_hooks_t hpa_hooks_override;
 static nstime_t    cur_time_clock;
@ -212,9 +237,7 @@ initialize_pa_infrastructure(int num_shards) {
 		}

 		/* Enable HPA for this shard with proper configuration */
-		hpa_shard_opts_t hpa_opts = HPA_SHARD_OPTS_DEFAULT;
-		hpa_opts.deferral_allowed =
-		    false; /* No background threads in microbench */
+		hpa_shard_opts_t hpa_opts = g_hpa_opts;

 		sec_opts_t sec_opts = SEC_OPTS_DEFAULT;
 		if (!g_use_sec) {
@ -417,14 +440,11 @@ simulate_trace(

 	for (size_t i = 0; i < count; i++) {
 		pa_event_t *event = &events[i];
-
-		/* Validate shard index */
-		if (event->shard_ind >= num_shards) {
-			fprintf(stderr,
-			    "Warning: Invalid shard index %d (max %d)\n",
-			    event->shard_ind, num_shards - 1);
-			continue;
-		}
+		/*
+		 * Collapse trace shard index into the active shard space. With
+		 * no override num_shards == max_shard_id+1, so this is a no-op.
+		 */
+		int shard_ind = event->shard_ind % num_shards;

 		set_clock(event->nsecs);
 		switch (event->operation) {
@ -441,7 +461,7 @@ simulate_trace(

 			/* Allocate using PA allocator */
 			edata_t *edata = pa_alloc(tsdn,
-			    &g_shard_infra[event->shard_ind].pa_shard, size,
+			    &g_shard_infra[shard_ind].pa_shard, size,
 			    PAGE /* alignment */, slab, szind, false /* zero */,
 			    false /* guarded */, &deferred_work_generated);

@ -450,14 +470,13 @@ simulate_trace(
 				g_alloc_records[g_alloc_counter].edata = edata;
 				g_alloc_records[g_alloc_counter].size = size;
 				g_alloc_records[g_alloc_counter].shard_ind =
-				    event->shard_ind;
+				    shard_ind;
 				g_alloc_records[g_alloc_counter].active = true;
 				g_alloc_counter++;

 				/* Update shard-specific stats */
-				g_shard_stats[event->shard_ind].alloc_count++;
-				g_shard_stats[event->shard_ind]
-				    .bytes_allocated += size;
+				g_shard_stats[shard_ind].alloc_count++;
+				g_shard_stats[shard_ind].bytes_allocated += size;

 				total_allocs++;
 				total_allocated_bytes += size;
@ -469,21 +488,20 @@ simulate_trace(
 			if (alloc_index < g_alloc_counter
 			    && g_alloc_records[alloc_index].active
 			    && g_alloc_records[alloc_index].shard_ind
-			        == event->shard_ind) {
+			        == shard_ind) {
 				/* Get tsdn for PA */
 				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 				bool    deferred_work_generated = false;

 				/* Deallocate using PA allocator */
 				pa_dalloc(tsdn,
-				    &g_shard_infra[event->shard_ind].pa_shard,
+				    &g_shard_infra[shard_ind].pa_shard,
 				    g_alloc_records[alloc_index].edata,
 				    &deferred_work_generated);

 				/* Update shard-specific stats */
-				g_shard_stats[event->shard_ind].dealloc_count++;
-				g_shard_stats[event->shard_ind]
-				    .bytes_allocated -=
+				g_shard_stats[shard_ind].dealloc_count++;
+				g_shard_stats[shard_ind].bytes_allocated -=
 				    g_alloc_records[alloc_index].size;

 				g_alloc_records[alloc_index].active = false;
@ -560,6 +578,9 @@ print_usage(const char *program) {
 	printf("  -p, --hpa-only       Use HPA only (no SEC)\n");
 	printf(
 	    "  -i, --interval N     Stats print interval (default: 100000, 0=disable)\n");
+	printf(
+	    "  -n, --nshards N      Force using N shards (events routed via shard_ind %% N).\n"
+	    "                       Default: derived from trace (max_shard_id + 1)\n");
 	printf(
 	    "\nTrace file format: shard_ind,operation,size_or_alloc_index,is_frequent\n");
 	printf("  - operation: 0=alloc, 1=dealloc\n");
@ -601,6 +622,21 @@ main(int argc, char *argv[]) {
 				return 1;
 			}
 			stats_interval = (size_t)atol(argv[++i]);
+		} else if (strcmp(argv[i], "-n") == 0
+		    || strcmp(argv[i], "--nshards") == 0) {
+			if (i + 1 >= argc) {
+				fprintf(stderr,
+				    "Error: %s requires an argument\n",
+				    argv[i]);
+				return 1;
+			}
+			g_nshards_override = atoi(argv[++i]);
+			if (g_nshards_override <= 0) {
+				fprintf(stderr,
+				    "Error: --nshards must be > 0 (got %d)\n",
+				    g_nshards_override);
+				return 1;
+			}
 		} else if (argv[i][0] != '-') {
 			trace_file = argv[i];
 		} else {
@ -651,7 +687,11 @@ main(int argc, char *argv[]) {
 		return 1;
 	}

-	int num_shards = max_shard_id + 1; /* shard IDs are 0-based */
+	int num_shards = (g_nshards_override > 0) ? g_nshards_override
+	                                          : (max_shard_id + 1);
+	printf("Shards: %d (trace max=%d, override=%s)\n", num_shards,
+	    max_shard_id,
+	    g_nshards_override > 0 ? "yes" : "no");
 	if (num_shards > MAX_ARENAS) {
 		fprintf(stderr, "Error: Too many arenas required (%d > %d)\n",
 		    num_shards, MAX_ARENAS);