jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h
guangli-dai 2651071172 Introducing a new usize calculation policy
Converting size to usize is what jemalloc has been done by ceiling
size to the closest size class. However, this causes lots of memory
wastes with HPA enabled.  This commit changes how usize is calculated so
that the gap between two contiguous usize is no larger than a page.
Specifically, this commit includes the following changes:

1. Adding a build-time config option (--enable-limit-usize-gap) and a
runtime one (limit_usize_gap) to guard the changes.
When build-time
config is enabled, some minor CPU overhead is expected because usize
will be stored and accessed apart from index.  When runtime option is
also enabled (it can only be enabled with the build-time config
enabled). a new usize calculation approach wil be employed.  This new
calculation will ceil size to the closest multiple of PAGE for all sizes
larger than USIZE_GROW_SLOW_THRESHOLD instead of using the size classes.
Note when the build-time config is enabled, the runtime option is
default on.

2. Prepare tcache for size to grow by PAGE over GROUP*PAGE.
To prepare for the upcoming changes where size class grows by PAGE when
larger than NGROUP * PAGE, disable the tcache when it is larger than 2 *
NGROUP * PAGE. The threshold for tcache is set higher to prevent perf
regression as much as possible while usizes between NGROUP * PAGE and 2 *
NGROUP * PAGE happen to grow by PAGE.

3. Prepare pac and hpa psset for size to grow by PAGE over GROUP*PAGE
For PAC, to avoid having too many bins, arena bins still have the same
layout.  This means some extra search is needed for a page-level request that
is not aligned with the orginal size class: it should also search the heap
before the current index since the previous heap might also be able to
have some allocations satisfying it.  The same changes apply to HPA's
psset.
This search relies on the enumeration of the heap because not all allocs in
the previous heap are guaranteed to satisfy the request.  To balance the
memory and CPU overhead, we currently enumerate at most a fixed number
of nodes before concluding none can satisfy the request during an
enumeration.

4. Add bytes counter to arena large stats.
To prepare for the upcoming usize changes, stats collected by
multiplying alive allocations and the bin size is no longer accurate.
Thus, add separate counters to record the bytes malloced and dalloced.

5. Change structs use when freeing to avoid using index2size for large sizes.
  - Change the definition of emap_alloc_ctx_t
  - Change the read of both from edata_t.
  - Change the assignment and usage of emap_alloc_ctx_t.
  - Change other callsites of index2size.
Note for the changes in the data structure, i.e., emap_alloc_ctx_t,
will be used when the build-time config (--enable-limit-usize-gap) is
enabled but they will store the same value as index2size(szind) if the
runtime option (opt_limit_usize_gap) is not enabled.

6. Adapt hpa to the usize changes.
Change the settings in sec to limit is usage for sizes larger than
USIZE_GROW_SLOW_THRESHOLD and modify corresponding tests.

7. Modify usize calculation and corresponding tests.
Change the sz_s2u_compute. Note sz_index2size is not always safe now
while sz_size2index still works as expected.
2025-03-05 11:07:22 -08:00

85 lines
2.9 KiB
C

#ifndef JEMALLOC_INTERNAL_EXTERNS_H
#define JEMALLOC_INTERNAL_EXTERNS_H
#include "jemalloc/internal/arena_types.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/hpa_opts.h"
#include "jemalloc/internal/nstime.h"
#include "jemalloc/internal/sec_opts.h"
#include "jemalloc/internal/tsd_types.h"
/* TSD checks this to set thread local slow state accordingly. */
extern bool malloc_slow;
/* Run-time options. */
extern bool opt_abort;
extern bool opt_abort_conf;
extern bool opt_trust_madvise;
extern bool opt_confirm_conf;
extern bool opt_hpa;
extern hpa_shard_opts_t opt_hpa_opts;
extern sec_opts_t opt_hpa_sec_opts;
extern const char *opt_junk;
extern bool opt_junk_alloc;
extern bool opt_junk_free;
extern void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size);
extern void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size);
extern void (*JET_MUTABLE invalid_conf_abort)(void);
extern bool opt_utrace;
extern bool opt_xmalloc;
extern bool opt_experimental_infallible_new;
extern bool opt_experimental_tcache_gc;
extern bool opt_zero;
extern unsigned opt_narenas;
extern zero_realloc_action_t opt_zero_realloc_action;
extern malloc_init_t malloc_init_state;
extern const char *const zero_realloc_mode_names[];
extern atomic_zu_t zero_realloc_count;
extern bool opt_cache_oblivious;
extern unsigned opt_debug_double_free_max_scan;
extern size_t opt_calloc_madvise_threshold;
extern bool opt_limit_usize_gap;
extern const char *opt_malloc_conf_symlink;
extern const char *opt_malloc_conf_env_var;
/* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
extern uintptr_t san_cache_bin_nonfast_mask;
/* Number of CPUs. */
extern unsigned ncpus;
/* Number of arenas used for automatic multiplexing of threads and arenas. */
extern unsigned narenas_auto;
/* Base index for manual arenas. */
extern unsigned manual_arena_base;
/*
* Arenas that are used to service external requests. Not all elements of the
* arenas array are necessarily used; arenas are created lazily as needed.
*/
extern atomic_p_t arenas[];
void *a0malloc(size_t size);
void a0dalloc(void *ptr);
void *bootstrap_malloc(size_t size);
void *bootstrap_calloc(size_t num, size_t size);
void bootstrap_free(void *ptr);
void arena_set(unsigned ind, arena_t *arena);
unsigned narenas_total_get(void);
arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
void arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
void iarena_cleanup(tsd_t *tsd);
void arena_cleanup(tsd_t *tsd);
size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
void sdallocx_default(void *ptr, size_t size, int flags);
void free_default(void *ptr);
void *malloc_default(size_t size);
#endif /* JEMALLOC_INTERNAL_EXTERNS_H */