diff --git a/test/stress/pa/pa_microbench.c b/test/stress/pa/pa_microbench.c index abdcf830..887b955e 100644 --- a/test/stress/pa/pa_microbench.c +++ b/test/stress/pa/pa_microbench.c @@ -26,7 +26,7 @@ */ #define MAX_LINE_LENGTH 1024 -#define MAX_ALLOCATIONS 10000000 +#define MAX_ALLOCATIONS 200000000 #define MAX_ARENAS 128 typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t; @@ -75,6 +75,12 @@ static size_t g_alloc_counter = 0; /* Global allocation counter */ static allocation_record_t *g_alloc_records = NULL; /* Global allocation tracking */ static bool g_use_sec = true; /* Global flag for SEC vs HPA-only */ +/* + * Override for the number of shards used by the microbench. -1 means use the + * value implied by the trace (max_shard_id + 1). When set to a positive value, + * each event's shard_ind is collapsed to (shard_ind % g_nshards_override). + */ +static int g_nshards_override = -1; /* Refactored arrays using structures */ static shard_stats_t *g_shard_stats = NULL; /* Per-shard tracking statistics */ @@ -82,6 +88,25 @@ static shard_infrastructure_t *g_shard_infra = NULL; /* Per-shard PA infrastructure */ static pa_central_t g_pa_central; /* Global PA central */ +/* + * HPA shard opts used by the microbench. Edit these values to control the + * baseline configuration. Changing these here is what controls the run; the + * MALLOC_CONF env var is NOT consulted for HPA shard opts in this microbench. + */ +static hpa_shard_opts_t g_hpa_opts = { + /* slab_max_alloc */ 128 * 1024, + /* hugification_threshold */ HUGEPAGE * 84 / 100, + /* dirty_mult */ FXP_INIT_PERCENT(30), + /* deferral_allowed */ false, + /* hugify_delay_ms */ 7804, + /* hugify_sync */ false, + /* min_purge_interval_ms */ 5 * 1000, + /* experimental_max_purge_nhp */ -1, + /* purge_threshold */ HUGEPAGE, + /* min_purge_delay_ms */ 0, + /* hugify_style */ hpa_hugify_style_eager +}; + /* Override for curtime */ static hpa_hooks_t hpa_hooks_override; static nstime_t cur_time_clock; @@ -212,9 +237,7 @@ initialize_pa_infrastructure(int num_shards) { } /* Enable HPA for this shard with proper configuration */ - hpa_shard_opts_t hpa_opts = HPA_SHARD_OPTS_DEFAULT; - hpa_opts.deferral_allowed = - false; /* No background threads in microbench */ + hpa_shard_opts_t hpa_opts = g_hpa_opts; sec_opts_t sec_opts = SEC_OPTS_DEFAULT; if (!g_use_sec) { @@ -417,14 +440,11 @@ simulate_trace( for (size_t i = 0; i < count; i++) { pa_event_t *event = &events[i]; - - /* Validate shard index */ - if (event->shard_ind >= num_shards) { - fprintf(stderr, - "Warning: Invalid shard index %d (max %d)\n", - event->shard_ind, num_shards - 1); - continue; - } + /* + * Collapse trace shard index into the active shard space. With + * no override num_shards == max_shard_id+1, so this is a no-op. + */ + int shard_ind = event->shard_ind % num_shards; set_clock(event->nsecs); switch (event->operation) { @@ -441,7 +461,7 @@ simulate_trace( /* Allocate using PA allocator */ edata_t *edata = pa_alloc(tsdn, - &g_shard_infra[event->shard_ind].pa_shard, size, + &g_shard_infra[shard_ind].pa_shard, size, PAGE /* alignment */, slab, szind, false /* zero */, false /* guarded */, &deferred_work_generated); @@ -450,14 +470,13 @@ simulate_trace( g_alloc_records[g_alloc_counter].edata = edata; g_alloc_records[g_alloc_counter].size = size; g_alloc_records[g_alloc_counter].shard_ind = - event->shard_ind; + shard_ind; g_alloc_records[g_alloc_counter].active = true; g_alloc_counter++; /* Update shard-specific stats */ - g_shard_stats[event->shard_ind].alloc_count++; - g_shard_stats[event->shard_ind] - .bytes_allocated += size; + g_shard_stats[shard_ind].alloc_count++; + g_shard_stats[shard_ind].bytes_allocated += size; total_allocs++; total_allocated_bytes += size; @@ -469,21 +488,20 @@ simulate_trace( if (alloc_index < g_alloc_counter && g_alloc_records[alloc_index].active && g_alloc_records[alloc_index].shard_ind - == event->shard_ind) { + == shard_ind) { /* Get tsdn for PA */ tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); bool deferred_work_generated = false; /* Deallocate using PA allocator */ pa_dalloc(tsdn, - &g_shard_infra[event->shard_ind].pa_shard, + &g_shard_infra[shard_ind].pa_shard, g_alloc_records[alloc_index].edata, &deferred_work_generated); /* Update shard-specific stats */ - g_shard_stats[event->shard_ind].dealloc_count++; - g_shard_stats[event->shard_ind] - .bytes_allocated -= + g_shard_stats[shard_ind].dealloc_count++; + g_shard_stats[shard_ind].bytes_allocated -= g_alloc_records[alloc_index].size; g_alloc_records[alloc_index].active = false; @@ -560,6 +578,9 @@ print_usage(const char *program) { printf(" -p, --hpa-only Use HPA only (no SEC)\n"); printf( " -i, --interval N Stats print interval (default: 100000, 0=disable)\n"); + printf( + " -n, --nshards N Force using N shards (events routed via shard_ind %% N).\n" + " Default: derived from trace (max_shard_id + 1)\n"); printf( "\nTrace file format: shard_ind,operation,size_or_alloc_index,is_frequent\n"); printf(" - operation: 0=alloc, 1=dealloc\n"); @@ -601,6 +622,21 @@ main(int argc, char *argv[]) { return 1; } stats_interval = (size_t)atol(argv[++i]); + } else if (strcmp(argv[i], "-n") == 0 + || strcmp(argv[i], "--nshards") == 0) { + if (i + 1 >= argc) { + fprintf(stderr, + "Error: %s requires an argument\n", + argv[i]); + return 1; + } + g_nshards_override = atoi(argv[++i]); + if (g_nshards_override <= 0) { + fprintf(stderr, + "Error: --nshards must be > 0 (got %d)\n", + g_nshards_override); + return 1; + } } else if (argv[i][0] != '-') { trace_file = argv[i]; } else { @@ -651,7 +687,11 @@ main(int argc, char *argv[]) { return 1; } - int num_shards = max_shard_id + 1; /* shard IDs are 0-based */ + int num_shards = (g_nshards_override > 0) ? g_nshards_override + : (max_shard_id + 1); + printf("Shards: %d (trace max=%d, override=%s)\n", num_shards, + max_shard_id, + g_nshards_override > 0 ? "yes" : "no"); if (num_shards > MAX_ARENAS) { fprintf(stderr, "Error: Too many arenas required (%d > %d)\n", num_shards, MAX_ARENAS);