mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-05-01 01:28:00 +03:00
Add configurable HPA opts, shard count override, and bump MAX_ALLOCATIONS
Three changes to make pa_microbench easier to drive for fragmentation experiments: - Replace HPA_SHARD_OPTS_DEFAULT use with a single editable g_hpa_opts global. The microbench does not consult MALLOC_CONF for HPA shard opts, so this is the place to set the baseline configuration (slab_max_alloc, hugification_threshold, dirty_mult, hugify_delay_ms, purge_threshold, hugify_style, etc.). - Add -n/--nshards N to override the shard count derived from the trace. When set, each event is routed to (event->shard_ind % N), letting us study the impact of arena consolidation. Without the flag the behavior is unchanged (num_shards = max_shard_id + 1). - Bump MAX_ALLOCATIONS from 10M to 200M so the full ~50M-event adfinder trace (and similar) fits in the in-memory event buffer.
This commit is contained in:
parent
6e0b8e6daa
commit
4cc497a4a7
1 changed files with 63 additions and 23 deletions
|
|
@ -26,7 +26,7 @@
|
|||
*/
|
||||
|
||||
#define MAX_LINE_LENGTH 1024
|
||||
#define MAX_ALLOCATIONS 10000000
|
||||
#define MAX_ALLOCATIONS 200000000
|
||||
#define MAX_ARENAS 128
|
||||
|
||||
typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
|
||||
|
|
@ -75,6 +75,12 @@ static size_t g_alloc_counter = 0; /* Global allocation counter */
|
|||
static allocation_record_t *g_alloc_records =
|
||||
NULL; /* Global allocation tracking */
|
||||
static bool g_use_sec = true; /* Global flag for SEC vs HPA-only */
|
||||
/*
|
||||
* Override for the number of shards used by the microbench. -1 means use the
|
||||
* value implied by the trace (max_shard_id + 1). When set to a positive value,
|
||||
* each event's shard_ind is collapsed to (shard_ind % g_nshards_override).
|
||||
*/
|
||||
static int g_nshards_override = -1;
|
||||
|
||||
/* Refactored arrays using structures */
|
||||
static shard_stats_t *g_shard_stats = NULL; /* Per-shard tracking statistics */
|
||||
|
|
@ -82,6 +88,25 @@ static shard_infrastructure_t *g_shard_infra =
|
|||
NULL; /* Per-shard PA infrastructure */
|
||||
static pa_central_t g_pa_central; /* Global PA central */
|
||||
|
||||
/*
|
||||
* HPA shard opts used by the microbench. Edit these values to control the
|
||||
* baseline configuration. Changing these here is what controls the run; the
|
||||
* MALLOC_CONF env var is NOT consulted for HPA shard opts in this microbench.
|
||||
*/
|
||||
static hpa_shard_opts_t g_hpa_opts = {
|
||||
/* slab_max_alloc */ 128 * 1024,
|
||||
/* hugification_threshold */ HUGEPAGE * 84 / 100,
|
||||
/* dirty_mult */ FXP_INIT_PERCENT(30),
|
||||
/* deferral_allowed */ false,
|
||||
/* hugify_delay_ms */ 7804,
|
||||
/* hugify_sync */ false,
|
||||
/* min_purge_interval_ms */ 5 * 1000,
|
||||
/* experimental_max_purge_nhp */ -1,
|
||||
/* purge_threshold */ HUGEPAGE,
|
||||
/* min_purge_delay_ms */ 0,
|
||||
/* hugify_style */ hpa_hugify_style_eager
|
||||
};
|
||||
|
||||
/* Override for curtime */
|
||||
static hpa_hooks_t hpa_hooks_override;
|
||||
static nstime_t cur_time_clock;
|
||||
|
|
@ -212,9 +237,7 @@ initialize_pa_infrastructure(int num_shards) {
|
|||
}
|
||||
|
||||
/* Enable HPA for this shard with proper configuration */
|
||||
hpa_shard_opts_t hpa_opts = HPA_SHARD_OPTS_DEFAULT;
|
||||
hpa_opts.deferral_allowed =
|
||||
false; /* No background threads in microbench */
|
||||
hpa_shard_opts_t hpa_opts = g_hpa_opts;
|
||||
|
||||
sec_opts_t sec_opts = SEC_OPTS_DEFAULT;
|
||||
if (!g_use_sec) {
|
||||
|
|
@ -417,14 +440,11 @@ simulate_trace(
|
|||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
pa_event_t *event = &events[i];
|
||||
|
||||
/* Validate shard index */
|
||||
if (event->shard_ind >= num_shards) {
|
||||
fprintf(stderr,
|
||||
"Warning: Invalid shard index %d (max %d)\n",
|
||||
event->shard_ind, num_shards - 1);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Collapse trace shard index into the active shard space. With
|
||||
* no override num_shards == max_shard_id+1, so this is a no-op.
|
||||
*/
|
||||
int shard_ind = event->shard_ind % num_shards;
|
||||
|
||||
set_clock(event->nsecs);
|
||||
switch (event->operation) {
|
||||
|
|
@ -441,7 +461,7 @@ simulate_trace(
|
|||
|
||||
/* Allocate using PA allocator */
|
||||
edata_t *edata = pa_alloc(tsdn,
|
||||
&g_shard_infra[event->shard_ind].pa_shard, size,
|
||||
&g_shard_infra[shard_ind].pa_shard, size,
|
||||
PAGE /* alignment */, slab, szind, false /* zero */,
|
||||
false /* guarded */, &deferred_work_generated);
|
||||
|
||||
|
|
@ -450,14 +470,13 @@ simulate_trace(
|
|||
g_alloc_records[g_alloc_counter].edata = edata;
|
||||
g_alloc_records[g_alloc_counter].size = size;
|
||||
g_alloc_records[g_alloc_counter].shard_ind =
|
||||
event->shard_ind;
|
||||
shard_ind;
|
||||
g_alloc_records[g_alloc_counter].active = true;
|
||||
g_alloc_counter++;
|
||||
|
||||
/* Update shard-specific stats */
|
||||
g_shard_stats[event->shard_ind].alloc_count++;
|
||||
g_shard_stats[event->shard_ind]
|
||||
.bytes_allocated += size;
|
||||
g_shard_stats[shard_ind].alloc_count++;
|
||||
g_shard_stats[shard_ind].bytes_allocated += size;
|
||||
|
||||
total_allocs++;
|
||||
total_allocated_bytes += size;
|
||||
|
|
@ -469,21 +488,20 @@ simulate_trace(
|
|||
if (alloc_index < g_alloc_counter
|
||||
&& g_alloc_records[alloc_index].active
|
||||
&& g_alloc_records[alloc_index].shard_ind
|
||||
== event->shard_ind) {
|
||||
== shard_ind) {
|
||||
/* Get tsdn for PA */
|
||||
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
|
||||
bool deferred_work_generated = false;
|
||||
|
||||
/* Deallocate using PA allocator */
|
||||
pa_dalloc(tsdn,
|
||||
&g_shard_infra[event->shard_ind].pa_shard,
|
||||
&g_shard_infra[shard_ind].pa_shard,
|
||||
g_alloc_records[alloc_index].edata,
|
||||
&deferred_work_generated);
|
||||
|
||||
/* Update shard-specific stats */
|
||||
g_shard_stats[event->shard_ind].dealloc_count++;
|
||||
g_shard_stats[event->shard_ind]
|
||||
.bytes_allocated -=
|
||||
g_shard_stats[shard_ind].dealloc_count++;
|
||||
g_shard_stats[shard_ind].bytes_allocated -=
|
||||
g_alloc_records[alloc_index].size;
|
||||
|
||||
g_alloc_records[alloc_index].active = false;
|
||||
|
|
@ -560,6 +578,9 @@ print_usage(const char *program) {
|
|||
printf(" -p, --hpa-only Use HPA only (no SEC)\n");
|
||||
printf(
|
||||
" -i, --interval N Stats print interval (default: 100000, 0=disable)\n");
|
||||
printf(
|
||||
" -n, --nshards N Force using N shards (events routed via shard_ind %% N).\n"
|
||||
" Default: derived from trace (max_shard_id + 1)\n");
|
||||
printf(
|
||||
"\nTrace file format: shard_ind,operation,size_or_alloc_index,is_frequent\n");
|
||||
printf(" - operation: 0=alloc, 1=dealloc\n");
|
||||
|
|
@ -601,6 +622,21 @@ main(int argc, char *argv[]) {
|
|||
return 1;
|
||||
}
|
||||
stats_interval = (size_t)atol(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-n") == 0
|
||||
|| strcmp(argv[i], "--nshards") == 0) {
|
||||
if (i + 1 >= argc) {
|
||||
fprintf(stderr,
|
||||
"Error: %s requires an argument\n",
|
||||
argv[i]);
|
||||
return 1;
|
||||
}
|
||||
g_nshards_override = atoi(argv[++i]);
|
||||
if (g_nshards_override <= 0) {
|
||||
fprintf(stderr,
|
||||
"Error: --nshards must be > 0 (got %d)\n",
|
||||
g_nshards_override);
|
||||
return 1;
|
||||
}
|
||||
} else if (argv[i][0] != '-') {
|
||||
trace_file = argv[i];
|
||||
} else {
|
||||
|
|
@ -651,7 +687,11 @@ main(int argc, char *argv[]) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
int num_shards = max_shard_id + 1; /* shard IDs are 0-based */
|
||||
int num_shards = (g_nshards_override > 0) ? g_nshards_override
|
||||
: (max_shard_id + 1);
|
||||
printf("Shards: %d (trace max=%d, override=%s)\n", num_shards,
|
||||
max_shard_id,
|
||||
g_nshards_override > 0 ? "yes" : "no");
|
||||
if (num_shards > MAX_ARENAS) {
|
||||
fprintf(stderr, "Error: Too many arenas required (%d > %d)\n",
|
||||
num_shards, MAX_ARENAS);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue