mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-06-25 21:35:42 +03:00
Implement cache index randomization for large allocations.
Extract szad size quantization into {extent,run}_quantize(), and .
quantize szad run sizes to the union of valid small region run sizes and
large run sizes.
Refactor iteration in arena_run_first_fit() to use
run_quantize{,_first,_next(), and add support for padded large runs.
For large allocations that have no specified alignment constraints,
compute a pseudo-random offset from the beginning of the first backing
page that is a multiple of the cache line size. Under typical
configurations with 4-KiB pages and 64-byte cache lines this results in
a uniform distribution among 64 page boundary offsets.
Add the --disable-cache-oblivious option, primarily intended for
performance testing.
This resolves #13.
This commit is contained in:
parent
6bb54cb9da
commit
8a03cf039c
10 changed files with 280 additions and 74 deletions
|
|
@ -290,6 +290,12 @@ struct arena_s {
|
|||
|
||||
uint64_t prof_accumbytes;
|
||||
|
||||
/*
|
||||
* PRNG state for cache index randomization of large allocation base
|
||||
* pointers.
|
||||
*/
|
||||
uint64_t offset_state;
|
||||
|
||||
dss_prec_t dss_prec;
|
||||
|
||||
/*
|
||||
|
|
@ -394,7 +400,15 @@ struct arena_s {
|
|||
/******************************************************************************/
|
||||
#ifdef JEMALLOC_H_EXTERNS
|
||||
|
||||
extern ssize_t opt_lg_dirty_mult;
|
||||
static const size_t large_pad =
|
||||
#ifdef JEMALLOC_CACHE_OBLIVIOUS
|
||||
PAGE
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
;
|
||||
|
||||
extern ssize_t opt_lg_dirty_mult;
|
||||
|
||||
extern arena_bin_info_t arena_bin_info[NBINS];
|
||||
|
||||
|
|
@ -475,7 +489,7 @@ void arena_stats_merge(arena_t *arena, const char **dss,
|
|||
arena_stats_t *astats, malloc_bin_stats_t *bstats,
|
||||
malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
|
||||
arena_t *arena_new(unsigned ind);
|
||||
void arena_boot(void);
|
||||
bool arena_boot(void);
|
||||
void arena_prefork(arena_t *arena);
|
||||
void arena_postfork_parent(arena_t *arena);
|
||||
void arena_postfork_child(arena_t *arena);
|
||||
|
|
@ -721,7 +735,7 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
|
|||
{
|
||||
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
|
||||
|
||||
assert((size & PAGE_MASK) == 0);
|
||||
assert(size == PAGE_CEILING(size));
|
||||
assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
|
||||
assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags);
|
||||
arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags);
|
||||
|
|
@ -734,7 +748,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
|
|||
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
|
||||
size_t mapbits = arena_mapbitsp_read(mapbitsp);
|
||||
|
||||
assert((size & PAGE_MASK) == 0);
|
||||
assert(size == PAGE_CEILING(size));
|
||||
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
|
||||
arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK));
|
||||
}
|
||||
|
|
@ -747,7 +761,7 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
|
|||
size_t mapbits = arena_mapbitsp_read(mapbitsp);
|
||||
size_t unzeroed;
|
||||
|
||||
assert((size & PAGE_MASK) == 0);
|
||||
assert(size == PAGE_CEILING(size));
|
||||
assert((flags & CHUNK_MAP_DIRTY) == flags);
|
||||
unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
|
||||
arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags
|
||||
|
|
@ -762,7 +776,8 @@ arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
|
|||
size_t mapbits = arena_mapbitsp_read(mapbitsp);
|
||||
|
||||
assert(binind <= BININD_INVALID);
|
||||
assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS);
|
||||
assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS +
|
||||
large_pad);
|
||||
arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
|
||||
(binind << CHUNK_MAP_BININD_SHIFT));
|
||||
}
|
||||
|
|
@ -1107,13 +1122,16 @@ arena_salloc(const void *ptr, bool demote)
|
|||
* end up looking at binind to determine that ptr is a
|
||||
* small allocation.
|
||||
*/
|
||||
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
|
||||
ret = arena_mapbits_large_size_get(chunk, pageind);
|
||||
assert(config_cache_oblivious || ((uintptr_t)ptr &
|
||||
PAGE_MASK) == 0);
|
||||
ret = arena_mapbits_large_size_get(chunk, pageind) -
|
||||
large_pad;
|
||||
assert(ret != 0);
|
||||
assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
|
||||
assert(pageind + ((ret+large_pad)>>LG_PAGE) <=
|
||||
chunk_npages);
|
||||
assert(arena_mapbits_dirty_get(chunk, pageind) ==
|
||||
arena_mapbits_dirty_get(chunk,
|
||||
pageind+(ret>>LG_PAGE)-1));
|
||||
pageind+((ret+large_pad)>>LG_PAGE)-1));
|
||||
} else {
|
||||
/*
|
||||
* Small allocation (possibly promoted to a large
|
||||
|
|
@ -1157,11 +1175,13 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
|
|||
size_t size = arena_mapbits_large_size_get(chunk,
|
||||
pageind);
|
||||
|
||||
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
|
||||
assert(config_cache_oblivious || ((uintptr_t)ptr &
|
||||
PAGE_MASK) == 0);
|
||||
|
||||
if (likely(tcache != NULL) && size <= tcache_maxclass)
|
||||
tcache_dalloc_large(tsd, tcache, ptr, size);
|
||||
else {
|
||||
if (likely(tcache != NULL) && size <= tcache_maxclass) {
|
||||
tcache_dalloc_large(tsd, tcache, ptr, size -
|
||||
large_pad);
|
||||
} else {
|
||||
arena_dalloc_large(extent_node_arena_get(
|
||||
&chunk->node), chunk, ptr);
|
||||
}
|
||||
|
|
@ -1188,7 +1208,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
|
|||
*/
|
||||
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
|
||||
size = arena_mapbits_large_size_get(chunk,
|
||||
pageind);
|
||||
pageind) - large_pad;
|
||||
}
|
||||
}
|
||||
assert(s2u(size) == s2u(arena_salloc(ptr, false)));
|
||||
|
|
@ -1205,7 +1225,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
|
|||
&chunk->node), chunk, ptr, pageind);
|
||||
}
|
||||
} else {
|
||||
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
|
||||
assert(config_cache_oblivious || ((uintptr_t)ptr &
|
||||
PAGE_MASK) == 0);
|
||||
|
||||
if (likely(tcache != NULL) && size <= tcache_maxclass)
|
||||
tcache_dalloc_large(tsd, tcache, ptr, size);
|
||||
|
|
|
|||
|
|
@ -126,6 +126,13 @@ static const bool config_ivsalloc =
|
|||
false
|
||||
#endif
|
||||
;
|
||||
static const bool config_cache_oblivious =
|
||||
#ifdef JEMALLOC_CACHE_OBLIVIOUS
|
||||
true
|
||||
#else
|
||||
false
|
||||
#endif
|
||||
;
|
||||
|
||||
#ifdef JEMALLOC_C11ATOMICS
|
||||
#include <stdatomic.h>
|
||||
|
|
|
|||
|
|
@ -192,6 +192,12 @@
|
|||
*/
|
||||
#undef JEMALLOC_IVSALLOC
|
||||
|
||||
/*
|
||||
* If defined, explicitly attempt to more uniformly distribute large allocation
|
||||
* pointer alignments across all cache indices.
|
||||
*/
|
||||
#undef JEMALLOC_CACHE_OBLIVIOUS
|
||||
|
||||
/*
|
||||
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -26,22 +26,22 @@
|
|||
* const uint32_t a, c : See above discussion.
|
||||
*/
|
||||
#define prng32(r, lg_range, state, a, c) do { \
|
||||
assert(lg_range > 0); \
|
||||
assert(lg_range <= 32); \
|
||||
assert((lg_range) > 0); \
|
||||
assert((lg_range) <= 32); \
|
||||
\
|
||||
r = (state * (a)) + (c); \
|
||||
state = r; \
|
||||
r >>= (32 - lg_range); \
|
||||
r >>= (32 - (lg_range)); \
|
||||
} while (false)
|
||||
|
||||
/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
|
||||
#define prng64(r, lg_range, state, a, c) do { \
|
||||
assert(lg_range > 0); \
|
||||
assert(lg_range <= 64); \
|
||||
assert((lg_range) > 0); \
|
||||
assert((lg_range) <= 64); \
|
||||
\
|
||||
r = (state * (a)) + (c); \
|
||||
state = r; \
|
||||
r >>= (64 - lg_range); \
|
||||
r >>= (64 - (lg_range)); \
|
||||
} while (false)
|
||||
|
||||
#endif /* JEMALLOC_H_TYPES */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue