From ec07fc3c5fd810cbb7e003807537df9d32f66eb0 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Mon, 27 Apr 2026 15:44:13 -0700 Subject: [PATCH] Extract initialization logic from jemalloc.c into jemalloc_init module --- Makefile.in | 1 + include/jemalloc/internal/jemalloc_init.h | 42 ++ .../internal/jemalloc_internal_externs.h | 4 - .../internal/jemalloc_internal_inlines_c.h | 6 +- .../internal/jemalloc_internal_types.h | 8 - .../projects/vc2015/jemalloc/jemalloc.vcxproj | 1 + .../vc2015/jemalloc/jemalloc.vcxproj.filters | 3 + .../projects/vc2017/jemalloc/jemalloc.vcxproj | 1 + .../vc2017/jemalloc/jemalloc.vcxproj.filters | 3 + .../projects/vc2019/jemalloc/jemalloc.vcxproj | 1 + .../vc2019/jemalloc/jemalloc.vcxproj.filters | 3 + .../projects/vc2022/jemalloc/jemalloc.vcxproj | 1 + .../vc2022/jemalloc/jemalloc.vcxproj.filters | 3 + src/arenas_management.c | 4 +- src/jemalloc.c | 697 +----------------- src/jemalloc_init.c | 687 +++++++++++++++++ 16 files changed, 758 insertions(+), 707 deletions(-) create mode 100644 include/jemalloc/internal/jemalloc_init.h create mode 100644 src/jemalloc_init.c diff --git a/Makefile.in b/Makefile.in index 59aa8e5a..a8d5ff5e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -95,6 +95,7 @@ LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_SRCS := $(srcroot)src/jemalloc.c \ + $(srcroot)src/jemalloc_init.c \ $(srcroot)src/arena.c \ $(srcroot)src/arenas_management.c \ $(srcroot)src/background_thread.c \ diff --git a/include/jemalloc/internal/jemalloc_init.h b/include/jemalloc/internal/jemalloc_init.h new file mode 100644 index 00000000..6e154be9 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_init.h @@ -0,0 +1,42 @@ +#ifndef JEMALLOC_INTERNAL_JEMALLOC_INIT_H +#define JEMALLOC_INTERNAL_JEMALLOC_INIT_H + +enum malloc_init_e { + malloc_init_uninitialized = 3, + malloc_init_a0_initialized = 2, + malloc_init_recursible = 1, + malloc_init_initialized = 0 /* Common case --> jnz. */ +}; +typedef enum malloc_init_e malloc_init_t; + +extern malloc_init_t malloc_init_state; + +bool malloc_is_initializer(void); +bool malloc_initializer_is_set(void); +void malloc_initializer_set(void); + +bool malloc_init_hard_a0(void); +bool malloc_init_hard(void); + +JEMALLOC_ALWAYS_INLINE bool +malloc_init_a0(void) { + if (unlikely(malloc_init_state == malloc_init_uninitialized)) { + return malloc_init_hard_a0(); + } + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +malloc_initialized(void) { + return (malloc_init_state == malloc_init_initialized); +} + +JEMALLOC_ALWAYS_INLINE bool +malloc_init(void) { + if (unlikely(!malloc_initialized()) && malloc_init_hard()) { + return true; + } + return false; +} + +#endif /* JEMALLOC_INTERNAL_JEMALLOC_INIT_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index f714fff8..43057b1a 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -37,7 +37,6 @@ extern bool opt_zero; extern unsigned opt_narenas; extern fxp_t opt_narenas_ratio; extern zero_realloc_action_t opt_zero_realloc_action; -extern malloc_init_t malloc_init_state; extern const char *const zero_realloc_mode_names[]; extern atomic_zu_t zero_realloc_count; extern bool opt_cache_oblivious; @@ -54,9 +53,6 @@ extern uintptr_t san_cache_bin_nonfast_mask; /* Number of CPUs. */ extern unsigned ncpus; -/* Will be refactored in subsequent commit */ -bool malloc_init_hard_a0(void); - void *bootstrap_malloc(size_t size); void *bootstrap_calloc(size_t num, size_t size); void bootstrap_free(void *ptr); diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 2c61f8c4..5dc14a7b 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -6,6 +6,7 @@ #include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/hook.h" +#include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/log.h" #include "jemalloc/internal/sz.h" @@ -280,11 +281,6 @@ fastpath_success_finish( } } -JEMALLOC_ALWAYS_INLINE bool -malloc_initialized(void) { - return (malloc_init_state == malloc_init_initialized); -} - /* * malloc() fastpath. Included here so that we can inline it into operator new; * function call overhead there is non-negligible as a fraction of total CPU in diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h index 0ade5461..985dc15f 100644 --- a/include/jemalloc/internal/jemalloc_internal_types.h +++ b/include/jemalloc/internal/jemalloc_internal_types.h @@ -20,14 +20,6 @@ typedef enum zero_realloc_action_e zero_realloc_action_t; /* Signature of write callback. */ typedef void(write_cb_t)(void *, const char *); -enum malloc_init_e { - malloc_init_uninitialized = 3, - malloc_init_a0_initialized = 2, - malloc_init_recursible = 1, - malloc_init_initialized = 0 /* Common case --> jnz. */ -}; -typedef enum malloc_init_e malloc_init_t; - /* * Flags bits: * diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index 63e49118..881e1862 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -69,6 +69,7 @@ + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index c0100096..7595606f 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -208,5 +208,8 @@ Source Files + + Source Files + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index 409f2195..b655de65 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -69,6 +69,7 @@ + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index c0100096..7595606f 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -208,5 +208,8 @@ Source Files + + Source Files + diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj index 963ef5cb..790d79d8 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj @@ -69,6 +69,7 @@ + diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters index c0100096..7595606f 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters @@ -208,5 +208,8 @@ Source Files + + Source Files + diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj index 84e57f28..9dfc7d84 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj @@ -69,6 +69,7 @@ + diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters index c0100096..7595606f 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters @@ -208,5 +208,8 @@ Source Files + + Source Files + diff --git a/src/arenas_management.c b/src/arenas_management.c index 03246bfd..261557b6 100644 --- a/src/arenas_management.c +++ b/src/arenas_management.c @@ -2,6 +2,7 @@ #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/sz.h" @@ -69,8 +70,7 @@ manual_arena_base_set(unsigned base) { void * a0ialloc(size_t size, bool zero, bool is_internal) { - if (unlikely(malloc_init_state == malloc_init_uninitialized) - && malloc_init_hard_a0()) { + if (unlikely(malloc_init_a0())) { return NULL; } diff --git a/src/jemalloc.c b/src/jemalloc.c index 45fd568a..76835068 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -12,6 +12,7 @@ #include "jemalloc/internal/fxp.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/hook.h" +#include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/log.h" #include "jemalloc/internal/malloc_io.h" @@ -182,74 +183,9 @@ bool opt_hpa = false; hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT; sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT; -malloc_init_t malloc_init_state = malloc_init_uninitialized; - /* False should be the common case. Set to true to trigger initialization. */ bool malloc_slow = true; -/* When malloc_slow is true, set the corresponding bits for sanity check. */ -enum { - flag_opt_junk_alloc = (1U), - flag_opt_junk_free = (1U << 1), - flag_opt_zero = (1U << 2), - flag_opt_utrace = (1U << 3), - flag_opt_xmalloc = (1U << 4) -}; -static uint8_t malloc_slow_flags; - -#ifdef JEMALLOC_THREADED_INIT -/* Used to let the initializing thread recursively allocate. */ -# define NO_INITIALIZER ((unsigned long)0) -# define INITIALIZER pthread_self() -# define IS_INITIALIZER \ - (pthread_equal(malloc_initializer, pthread_self())) -static pthread_t malloc_initializer = NO_INITIALIZER; -#else -# define NO_INITIALIZER false -# define INITIALIZER true -# define IS_INITIALIZER malloc_initializer -static bool malloc_initializer = NO_INITIALIZER; -#endif - -/* Used to avoid initialization races. */ -#ifdef _WIN32 -# if _WIN32_WINNT >= 0x0600 -static malloc_mutex_t init_lock = SRWLOCK_INIT; -# else -static malloc_mutex_t init_lock; -static bool init_lock_initialized = false; - -JEMALLOC_ATTR(constructor) -static void WINAPI -_init_init_lock(void) { - /* - * If another constructor in the same binary is using mallctl to e.g. - * set up extent hooks, it may end up running before this one, and - * malloc_init_hard will crash trying to lock the uninitialized lock. So - * we force an initialization of the lock in malloc_init_hard as well. - * We don't try to care about atomicity of the accessed to the - * init_lock_initialized boolean, since it really only matters early in - * the process creation, before any separate thread normally starts - * doing anything. - */ - if (!init_lock_initialized) { - malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT, - malloc_mutex_rank_exclusive); - } - init_lock_initialized = true; -} - -# ifdef _MSC_VER -# pragma section(".CRT$XCU", read) -JEMALLOC_SECTION(".CRT$XCU") -JEMALLOC_ATTR(used) -static const void(WINAPI *init_init_lock)(void) = _init_init_lock; -# endif -# endif -#else -static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; -#endif - typedef struct { void *p; /* Input pointer (as in realloc(p, s)). */ size_t s; /* Request size. */ @@ -274,35 +210,11 @@ typedef struct { #endif -/******************************************************************************/ -/* - * Function prototypes for static functions that are referenced prior to - * definition. - */ - -static bool malloc_init_hard(void); - /******************************************************************************/ /* * Begin miscellaneous support functions. */ -JEMALLOC_ALWAYS_INLINE bool -malloc_init_a0(void) { - if (unlikely(malloc_init_state == malloc_init_uninitialized)) { - return malloc_init_hard_a0(); - } - return false; -} - -JEMALLOC_ALWAYS_INLINE bool -malloc_init(void) { - if (unlikely(!malloc_initialized()) && malloc_init_hard()) { - return true; - } - return false; -} - /* * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-sensitive * situations that cannot tolerate TLS variable access (TLS allocation and very @@ -340,40 +252,6 @@ bootstrap_free(void *ptr) { a0idalloc(ptr, false); } -static void -stats_print_atexit(void) { - if (config_stats) { - tsdn_t *tsdn; - unsigned narenas, i; - - tsdn = tsdn_fetch(); - - /* - * Merge stats from extant threads. This is racy, since - * individual threads do not lock when recording tcache stats - * events. As a consequence, the final stats may be slightly - * out of date by the time they are reported, if other threads - * continue to allocate. - */ - for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { - arena_t *arena = arena_get(tsdn, i, false); - if (arena != NULL) { - tcache_slow_t *tcache_slow; - - malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); - ql_foreach ( - tcache_slow, &arena->tcache_ql, link) { - tcache_stats_merge( - tsdn, tcache_slow->tcache, arena); - } - malloc_mutex_unlock( - tsdn, &arena->tcache_ql_mtx); - } - } - } - je_malloc_stats_print(NULL, NULL, opt_stats_print_opts); -} - /* * Ensure that we don't hold any locks upon entry to or exit from allocator * code (in a "broad" sense that doesn't count a reentrant allocation as an @@ -403,563 +281,6 @@ check_entry_exit_locking(tsdn_t *tsdn) { * End miscellaneous support functions. */ /******************************************************************************/ -/* - * Begin initialization functions. - */ - -static unsigned -malloc_ncpus(void) { - long result; - -#ifdef _WIN32 - SYSTEM_INFO si; - GetSystemInfo(&si); - result = si.dwNumberOfProcessors; -#elif defined(CPU_COUNT) - /* - * glibc >= 2.6 has the CPU_COUNT macro. - * - * glibc's sysconf() uses isspace(). glibc allocates for the first time - * *before* setting up the isspace tables. Therefore we need a - * different method to get the number of CPUs. - * - * The getaffinity approach is also preferred when only a subset of CPUs - * is available, to avoid using more arenas than necessary. - */ - { -# if defined(__FreeBSD__) || defined(__DragonFly__) - cpuset_t set; -# else - cpu_set_t set; -# endif -# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) - sched_getaffinity(0, sizeof(set), &set); -# else - pthread_getaffinity_np(pthread_self(), sizeof(set), &set); -# endif - result = CPU_COUNT(&set); - } -#else - result = sysconf(_SC_NPROCESSORS_ONLN); -#endif - return ((result == -1) ? 1 : (unsigned)result); -} - -/* - * Ensure that number of CPUs is determistinc, i.e. it is the same based on: - * - sched_getaffinity() - * - _SC_NPROCESSORS_ONLN - * - _SC_NPROCESSORS_CONF - * Since otherwise tricky things is possible with percpu arenas in use. - */ -static bool -malloc_cpu_count_is_deterministic(void) { -#ifdef _WIN32 - return true; -#else - long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN); - long cpu_conf = sysconf(_SC_NPROCESSORS_CONF); - if (cpu_onln != cpu_conf) { - return false; - } -# if defined(CPU_COUNT) -# if defined(__FreeBSD__) || defined(__DragonFly__) - cpuset_t set; -# else - cpu_set_t set; -# endif /* __FreeBSD__ */ -# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) - sched_getaffinity(0, sizeof(set), &set); -# else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */ - pthread_getaffinity_np(pthread_self(), sizeof(set), &set); -# endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */ - long cpu_affinity = CPU_COUNT(&set); - if (cpu_affinity != cpu_conf) { - return false; - } -# endif /* CPU_COUNT */ - return true; -#endif -} - -static void -malloc_slow_flag_init(void) { - /* - * Combine the runtime options into malloc_slow for fast path. Called - * after processing all the options. - */ - malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0) - | (opt_junk_free ? flag_opt_junk_free : 0) - | (opt_zero ? flag_opt_zero : 0) - | (opt_utrace ? flag_opt_utrace : 0) - | (opt_xmalloc ? flag_opt_xmalloc : 0); - - malloc_slow = (malloc_slow_flags != 0); -} - -static bool -malloc_init_hard_needed(void) { - if (malloc_initialized() - || (IS_INITIALIZER - && malloc_init_state == malloc_init_recursible)) { - /* - * Another thread initialized the allocator before this one - * acquired init_lock, or this thread is the initializing - * thread, and it is recursively allocating. - */ - return false; - } -#ifdef JEMALLOC_THREADED_INIT - if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) { - /* Busy-wait until the initializing thread completes. */ - spin_t spinner = SPIN_INITIALIZER; - do { - malloc_mutex_unlock(TSDN_NULL, &init_lock); - spin_adaptive(&spinner); - malloc_mutex_lock(TSDN_NULL, &init_lock); - } while (!malloc_initialized()); - return false; - } -#endif - return true; -} - -static bool -malloc_init_hard_a0_locked(void) { - malloc_initializer = INITIALIZER; - - JEMALLOC_DIAGNOSTIC_PUSH - JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS - sc_data_t sc_data = {0}; - JEMALLOC_DIAGNOSTIC_POP - - /* - * Ordering here is somewhat tricky; we need sc_boot() first, since that - * determines what the size classes will be, and then - * malloc_conf_init(), since any slab size tweaking will need to be done - * before sz_boot and bin_info_boot, which assume that the values they - * read out of sc_data_global are final. - */ - sc_boot(&sc_data); - unsigned bin_shard_sizes[SC_NBINS]; - bin_shard_sizes_boot(bin_shard_sizes); - /* - * prof_boot0 only initializes opt_prof_prefix. We need to do it before - * we parse malloc_conf options, in case malloc_conf parsing overwrites - * it. - */ - if (config_prof) { - prof_boot0(); - } - char readlink_buf[PATH_MAX + 1]; - readlink_buf[0] = '\0'; - malloc_conf_init(&sc_data, bin_shard_sizes, readlink_buf); - san_init(opt_lg_san_uaf_align); - sz_boot(&sc_data, opt_cache_oblivious); - bin_info_boot(&sc_data, bin_shard_sizes); - - if (opt_stats_print) { - /* Print statistics at exit. */ - if (atexit(stats_print_atexit) != 0) { - malloc_write(": Error in atexit()\n"); - if (opt_abort) { - abort(); - } - } - } - - if (stats_boot()) { - return true; - } - if (pages_boot()) { - return true; - } - if (base_boot(TSDN_NULL)) { - return true; - } - /* emap_global is static, hence zeroed. */ - if (emap_init(&arena_emap_global, b0get(), /* zeroed */ true)) { - return true; - } - if (extent_boot()) { - return true; - } - if (ctl_boot()) { - return true; - } - if (config_prof) { - prof_boot1(); - } - if (opt_hpa && !hpa_supported()) { - malloc_printf( - ": HPA not supported in the current " - "configuration; %s.", - opt_abort_conf ? "aborting" : "disabling"); - if (opt_abort_conf) { - malloc_abort_invalid_conf(); - } else { - opt_hpa = false; - } - } - if (arena_boot(&sc_data, b0get(), opt_hpa)) { - return true; - } - if (tcache_boot(TSDN_NULL, b0get())) { - return true; - } - if (arenas_management_boot()) { - return true; - } - hook_boot(); - experimental_thread_events_boot(); - /* - * Create enough scaffolding to allow recursive allocation in - * malloc_ncpus(). - */ - narenas_auto_set(1); - manual_arena_base_set(narenas_auto + 1); - memset(arenas, 0, sizeof(arena_t *) * narenas_auto); - /* - * Initialize one arena here. The rest are lazily created in - * arena_choose_hard(). - */ - if (arena_init(TSDN_NULL, 0, &arena_config_default) == NULL) { - return true; - } - - if (opt_hpa && !hpa_supported()) { - malloc_printf( - ": HPA not supported in the current " - "configuration; %s.", - opt_abort_conf ? "aborting" : "disabling"); - if (opt_abort_conf) { - malloc_abort_invalid_conf(); - } else { - opt_hpa = false; - } - } - - malloc_init_state = malloc_init_a0_initialized; - - size_t buf_len = strlen(readlink_buf); - if (buf_len > 0) { - void *readlink_allocated = a0ialloc(buf_len + 1, false, true); - if (readlink_allocated != NULL) { - memcpy(readlink_allocated, readlink_buf, buf_len + 1); - opt_malloc_conf_symlink = readlink_allocated; - } - } - - return false; -} - -bool -malloc_init_hard_a0(void) { - bool ret; - - malloc_mutex_lock(TSDN_NULL, &init_lock); - ret = malloc_init_hard_a0_locked(); - malloc_mutex_unlock(TSDN_NULL, &init_lock); - return ret; -} - -/* Initialize data structures which may trigger recursive allocation. */ -static bool -malloc_init_hard_recursible(void) { - malloc_init_state = malloc_init_recursible; - - ncpus = malloc_ncpus(); - if (opt_percpu_arena != percpu_arena_disabled) { - bool cpu_count_is_deterministic = - malloc_cpu_count_is_deterministic(); - if (!cpu_count_is_deterministic) { - /* - * If # of CPU is not deterministic, and narenas not - * specified, disables per cpu arena since it may not - * detect CPU IDs properly. - */ - if (opt_narenas == 0) { - opt_percpu_arena = percpu_arena_disabled; - malloc_write( - ": Number of CPUs " - "detected is not deterministic. Per-CPU " - "arena disabled.\n"); - if (opt_abort_conf) { - malloc_abort_invalid_conf(); - } - if (opt_abort) { - abort(); - } - } - } - } - -#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \ - && !defined(JEMALLOC_ZONE) && !defined(_WIN32) \ - && !defined(__native_client__)) - /* LinuxThreads' pthread_atfork() allocates. */ - if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, - jemalloc_postfork_child) - != 0) { - malloc_write(": Error in pthread_atfork()\n"); - if (opt_abort) { - abort(); - } - return true; - } -#endif - - if (background_thread_boot0()) { - return true; - } - - return false; -} - -static unsigned -malloc_narenas_default(void) { - assert(ncpus > 0); - /* - * For SMP systems, create more than one arena per CPU by - * default. - */ - if (ncpus > 1) { - fxp_t fxp_ncpus = FXP_INIT_INT(ncpus); - fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio); - uint32_t int_goal = fxp_round_nearest(goal); - if (int_goal == 0) { - return 1; - } - return int_goal; - } else { - return 1; - } -} - -static percpu_arena_mode_t -percpu_arena_as_initialized(percpu_arena_mode_t mode) { - assert(!malloc_initialized()); - assert(mode <= percpu_arena_disabled); - - if (mode != percpu_arena_disabled) { - mode += percpu_arena_mode_enabled_base; - } - - return mode; -} - -static bool -malloc_init_narenas(tsdn_t *tsdn) { - assert(ncpus > 0); - - if (opt_percpu_arena != percpu_arena_disabled) { - if (!have_percpu_arena || malloc_getcpu() < 0) { - opt_percpu_arena = percpu_arena_disabled; - malloc_printf( - ": perCPU arena getcpu() not " - "available. Setting narenas to %u.\n", - opt_narenas ? opt_narenas - : malloc_narenas_default()); - if (opt_abort) { - abort(); - } - } else { - if (ncpus >= MALLOCX_ARENA_LIMIT) { - malloc_printf( - ": narenas w/ percpu" - "arena beyond limit (%d)\n", - ncpus); - if (opt_abort) { - abort(); - } - return true; - } - /* NB: opt_percpu_arena isn't fully initialized yet. */ - if (percpu_arena_as_initialized(opt_percpu_arena) - == per_phycpu_arena - && ncpus % 2 != 0) { - malloc_printf( - ": invalid " - "configuration -- per physical CPU arena " - "with odd number (%u) of CPUs (no hyper " - "threading?).\n", - ncpus); - if (opt_abort) - abort(); - } - unsigned n = percpu_arena_ind_limit( - percpu_arena_as_initialized(opt_percpu_arena)); - if (opt_narenas < n) { - /* - * If narenas is specified with percpu_arena - * enabled, actual narenas is set as the greater - * of the two. percpu_arena_choose will be free - * to use any of the arenas based on CPU - * id. This is conservative (at a small cost) - * but ensures correctness. - * - * If for some reason the ncpus determined at - * boot is not the actual number (e.g. because - * of affinity setting from numactl), reserving - * narenas this way provides a workaround for - * percpu_arena. - */ - opt_narenas = n; - } - } - } - if (opt_narenas == 0) { - opt_narenas = malloc_narenas_default(); - } - assert(opt_narenas > 0); - - narenas_auto_set(opt_narenas); - /* - * Limit the number of arenas to the indexing range of MALLOCX_ARENA(). - */ - if (narenas_auto >= MALLOCX_ARENA_LIMIT) { - narenas_auto_set(MALLOCX_ARENA_LIMIT - 1); - malloc_printf(": Reducing narenas to limit (%d)\n", - narenas_auto); - } - narenas_total_set(narenas_auto); - if (arena_init_huge(tsdn, arena_get(tsdn, 0, false))) { - narenas_total_inc(); - } - manual_arena_base_set(narenas_total_get()); - - return false; -} - -static void -malloc_init_percpu(void) { - opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena); -} - -static bool -malloc_init_hard_finish(void) { - if (malloc_mutex_boot()) { - return true; - } - - malloc_init_state = malloc_init_initialized; - malloc_slow_flag_init(); - - return false; -} - -static void -malloc_init_hard_cleanup(tsdn_t *tsdn, bool reentrancy_set) { - malloc_mutex_assert_owner(tsdn, &init_lock); - malloc_mutex_unlock(tsdn, &init_lock); - if (reentrancy_set) { - assert(!tsdn_null(tsdn)); - tsd_t *tsd = tsdn_tsd(tsdn); - assert(tsd_reentrancy_level_get(tsd) > 0); - post_reentrancy(tsd); - } -} - -static bool -malloc_init_hard(void) { - tsd_t *tsd; - - assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD); - assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD); - /* - * This asserts an extreme case where TINY_MAXCLASS is larger - * than LARGE_MINCLASS. It could only happen if some constants - * are configured miserably wrong. - */ - assert(SC_NTINY == 0 || SC_LG_TINY_MAXCLASS <= SC_LG_LARGE_MINCLASS); - -#if defined(_WIN32) && _WIN32_WINNT < 0x0600 - _init_init_lock(); -#endif - malloc_mutex_lock(TSDN_NULL, &init_lock); - -#define UNLOCK_RETURN(tsdn, ret, reentrancy) \ - malloc_init_hard_cleanup(tsdn, reentrancy); \ - return ret; - - if (!malloc_init_hard_needed()) { - UNLOCK_RETURN(TSDN_NULL, false, false) - } - - if (malloc_init_state != malloc_init_a0_initialized - && malloc_init_hard_a0_locked()) { - UNLOCK_RETURN(TSDN_NULL, true, false) - } - - malloc_mutex_unlock(TSDN_NULL, &init_lock); - /* Recursive allocation relies on functional tsd. */ - tsd = malloc_tsd_boot0(); - if (tsd == NULL) { - return true; - } - if (malloc_init_hard_recursible()) { - return true; - } - - malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); - /* Set reentrancy level to 1 during init. */ - pre_reentrancy(tsd, NULL); - /* Initialize narenas before prof_boot2 (for allocation). */ - if (malloc_init_narenas(tsd_tsdn(tsd)) - || background_thread_boot1(tsd_tsdn(tsd), b0get())) { - UNLOCK_RETURN(tsd_tsdn(tsd), true, true) - } - if (opt_hpa) { - /* - * We didn't initialize arena 0 hpa_shard in arena_new, because - * background_thread_enabled wasn't initialized yet, but we - * need it to set correct value for deferral_allowed. - */ - arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false); - hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts; - hpa_shard_opts.deferral_allowed = background_thread_enabled(); - if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard, - &hpa_shard_opts, &opt_hpa_sec_opts)) { - UNLOCK_RETURN(tsd_tsdn(tsd), true, true) - } - } - if (config_prof && prof_boot2(tsd, b0get())) { - UNLOCK_RETURN(tsd_tsdn(tsd), true, true) - } - - malloc_init_percpu(); - - if (malloc_init_hard_finish()) { - UNLOCK_RETURN(tsd_tsdn(tsd), true, true) - } - post_reentrancy(tsd); - malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); - - witness_assert_lockless( - witness_tsd_tsdn(tsd_witness_tsdp_get_unsafe(tsd))); - malloc_tsd_boot1(); - /* Update TSD after tsd_boot1. */ - tsd = tsd_fetch(); - if (opt_background_thread) { - assert(have_background_thread); - /* - * Need to finish init & unlock first before creating background - * threads (pthread_create depends on malloc). ctl_init (which - * sets isthreaded) needs to be called without holding any lock. - */ - background_thread_ctl_init(tsd_tsdn(tsd)); - if (background_thread_create(tsd, 0)) { - return true; - } - } -#undef UNLOCK_RETURN - return false; -} - -/* - * End initialization functions. - */ -/******************************************************************************/ /* * Begin allocation-path internal functions and data structures. */ @@ -1610,7 +931,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { } assert(ptr != NULL); - assert(malloc_initialized() || IS_INITIALIZER); + assert(malloc_initialized() || malloc_is_initializer()); emap_alloc_ctx_t alloc_ctx; emap_alloc_ctx_lookup( @@ -1644,7 +965,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { } assert(ptr != NULL); - assert(malloc_initialized() || IS_INITIALIZER); + assert(malloc_initialized() || malloc_is_initializer()); emap_alloc_ctx_t alloc_ctx; szind_t szind = sz_size2index(usize); @@ -2158,7 +1479,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) { assert(ptr != NULL); assert(size != 0); - assert(malloc_initialized() || IS_INITIALIZER); + assert(malloc_initialized() || malloc_is_initializer()); tsd = tsd_fetch(); check_entry_exit_locking(tsd_tsdn(tsd)); @@ -2432,7 +1753,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { assert(ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized() || IS_INITIALIZER); + assert(malloc_initialized() || malloc_is_initializer()); tsd = tsd_fetch(); check_entry_exit_locking(tsd_tsdn(tsd)); @@ -2515,7 +1836,7 @@ JEMALLOC_ATTR(pure) je_sallocx(const void *ptr, int flags) { LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags); - assert(malloc_initialized() || IS_INITIALIZER); + assert(malloc_initialized() || malloc_is_initializer()); assert(ptr != NULL); tsdn = tsdn_fetch(); @@ -2539,7 +1860,7 @@ je_dallocx(void *ptr, int flags) { LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags); assert(ptr != NULL); - assert(malloc_initialized() || IS_INITIALIZER); + assert(malloc_initialized() || malloc_is_initializer()); tsd_t *tsd = tsd_fetch_min(); bool fast = tsd_fast(tsd); @@ -2576,7 +1897,7 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) { JEMALLOC_NOINLINE void sdallocx_default(void *ptr, size_t size, int flags) { assert(ptr != NULL); - assert(malloc_initialized() || IS_INITIALIZER); + assert(malloc_initialized() || malloc_is_initializer()); tsd_t *tsd = tsd_fetch_min(); bool fast = tsd_fast(tsd); @@ -2726,7 +2047,7 @@ je_malloc_stats_print( JEMALLOC_ALWAYS_INLINE size_t je_malloc_usable_size_impl(JEMALLOC_USABLE_SIZE_CONST void *ptr) { - assert(malloc_initialized() || IS_INITIALIZER); + assert(malloc_initialized() || malloc_is_initializer()); tsdn_t *tsdn = tsdn_fetch(); check_entry_exit_locking(tsdn); diff --git a/src/jemalloc_init.c b/src/jemalloc_init.c new file mode 100644 index 00000000..37e1350f --- /dev/null +++ b/src/jemalloc_init.c @@ -0,0 +1,687 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/arenas_management.h" +#include "jemalloc/internal/conf.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/emap.h" +#include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/hook.h" +#include "jemalloc/internal/jemalloc_init.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/san.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/spin.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/thread_event.h" + +#ifdef JEMALLOC_THREADED_INIT +/* Used to let the initializing thread recursively allocate. */ +# define NO_INITIALIZER ((unsigned long)0) +# define INITIALIZER pthread_self() +static pthread_t malloc_initializer = NO_INITIALIZER; +#else +# define NO_INITIALIZER false +# define INITIALIZER true +static bool malloc_initializer = NO_INITIALIZER; +#endif + +bool +malloc_is_initializer(void) { +#ifdef JEMALLOC_THREADED_INIT + return pthread_equal(malloc_initializer, pthread_self()); +#else + return malloc_initializer; +#endif +} + +bool +malloc_initializer_is_set(void) { + return malloc_initializer != NO_INITIALIZER; +} + +void +malloc_initializer_set(void) { + malloc_initializer = INITIALIZER; +} + +/* Used to avoid initialization races. */ +#ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 +static malloc_mutex_t init_lock = SRWLOCK_INIT; +# else +static malloc_mutex_t init_lock; +static bool init_lock_initialized = false; + +JEMALLOC_ATTR(constructor) +static void WINAPI +_init_init_lock(void) { + /* + * If another constructor in the same binary is using mallctl to e.g. + * set up extent hooks, it may end up running before this one, and + * malloc_init_hard will crash trying to lock the uninitialized lock. So + * we force an initialization of the lock in malloc_init_hard as well. + * We don't try to care about atomicity of the accessed to the + * init_lock_initialized boolean, since it really only matters early in + * the process creation, before any separate thread normally starts + * doing anything. + */ + if (!init_lock_initialized) { + malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT, + malloc_mutex_rank_exclusive); + } + init_lock_initialized = true; +} + +# ifdef _MSC_VER +# pragma section(".CRT$XCU", read) +JEMALLOC_SECTION(".CRT$XCU") +JEMALLOC_ATTR(used) +static const void(WINAPI *init_init_lock)(void) = _init_init_lock; +# endif +# endif +#else +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +#endif + +malloc_init_t malloc_init_state = malloc_init_uninitialized; + +/* When malloc_slow is true, set the corresponding bits for sanity check. */ +enum { + flag_opt_junk_alloc = (1U), + flag_opt_junk_free = (1U << 1), + flag_opt_zero = (1U << 2), + flag_opt_utrace = (1U << 3), + flag_opt_xmalloc = (1U << 4) +}; +static uint8_t malloc_slow_flags; + +static void +malloc_slow_flag_init(void) { + /* + * Combine the runtime options into malloc_slow for fast path. Called + * after bootstrap is complete. + */ + malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0) + | (opt_junk_free ? flag_opt_junk_free : 0) + | (opt_zero ? flag_opt_zero : 0) + | (opt_utrace ? flag_opt_utrace : 0) + | (opt_xmalloc ? flag_opt_xmalloc : 0); + + malloc_slow = (malloc_slow_flags != 0); +} + +static void stats_print_atexit(void); +static unsigned malloc_ncpus(void); +static bool malloc_cpu_count_is_deterministic(void); + +static bool +malloc_init_hard_needed(void) { + if (malloc_initialized() + || (malloc_is_initializer() + && malloc_init_state == malloc_init_recursible)) { + /* + * Another thread initialized the allocator before this one + * acquired init_lock, or this thread is the initializing + * thread, and it is recursively allocating. + */ + return false; + } +#ifdef JEMALLOC_THREADED_INIT + if (malloc_initializer_is_set() && !malloc_is_initializer()) { + /* Busy-wait until the initializing thread completes. */ + spin_t spinner = SPIN_INITIALIZER; + do { + malloc_mutex_unlock(TSDN_NULL, &init_lock); + spin_adaptive(&spinner); + malloc_mutex_lock(TSDN_NULL, &init_lock); + } while (!malloc_initialized()); + return false; + } +#endif + return true; +} + +static bool +malloc_init_hard_a0_locked(void) { + malloc_initializer_set(); + + JEMALLOC_DIAGNOSTIC_PUSH + JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS + sc_data_t sc_data = {0}; + JEMALLOC_DIAGNOSTIC_POP + + /* + * Ordering here is somewhat tricky; we need sc_boot() first, since that + * determines what the size classes will be, and then + * malloc_conf_init(), since any slab size tweaking will need to be done + * before sz_boot and bin_info_boot, which assume that the values they + * read out of sc_data_global are final. + */ + sc_boot(&sc_data); + unsigned bin_shard_sizes[SC_NBINS]; + bin_shard_sizes_boot(bin_shard_sizes); + /* + * prof_boot0 only initializes opt_prof_prefix. We need to do it before + * we parse malloc_conf options, in case malloc_conf parsing overwrites + * it. + */ + if (config_prof) { + prof_boot0(); + } + char readlink_buf[PATH_MAX + 1]; + readlink_buf[0] = '\0'; + malloc_conf_init(&sc_data, bin_shard_sizes, readlink_buf); + san_init(opt_lg_san_uaf_align); + sz_boot(&sc_data, opt_cache_oblivious); + bin_info_boot(&sc_data, bin_shard_sizes); + + if (opt_stats_print) { + /* Print statistics at exit. */ + if (atexit(stats_print_atexit) != 0) { + malloc_write(": Error in atexit()\n"); + if (opt_abort) { + abort(); + } + } + } + + if (stats_boot()) { + return true; + } + if (pages_boot()) { + return true; + } + if (base_boot(TSDN_NULL)) { + return true; + } + /* emap_global is static, hence zeroed. */ + if (emap_init(&arena_emap_global, b0get(), /* zeroed */ true)) { + return true; + } + if (extent_boot()) { + return true; + } + if (ctl_boot()) { + return true; + } + if (config_prof) { + prof_boot1(); + } + if (opt_hpa && !hpa_supported()) { + malloc_printf( + ": HPA not supported in the current " + "configuration; %s.", + opt_abort_conf ? "aborting" : "disabling"); + if (opt_abort_conf) { + malloc_abort_invalid_conf(); + } else { + opt_hpa = false; + } + } + if (arena_boot(&sc_data, b0get(), opt_hpa)) { + return true; + } + if (tcache_boot(TSDN_NULL, b0get())) { + return true; + } + if (arenas_management_boot()) { + return true; + } + hook_boot(); + experimental_thread_events_boot(); + /* + * Create enough scaffolding to allow recursive allocation in + * malloc_ncpus(). + */ + narenas_auto_set(1); + manual_arena_base_set(narenas_auto + 1); + memset(arenas, 0, sizeof(arena_t *) * narenas_auto); + /* + * Initialize one arena here. The rest are lazily created in + * arena_choose_hard(). + */ + if (arena_init(TSDN_NULL, 0, &arena_config_default) == NULL) { + return true; + } + + if (opt_hpa && !hpa_supported()) { + malloc_printf( + ": HPA not supported in the current " + "configuration; %s.", + opt_abort_conf ? "aborting" : "disabling"); + if (opt_abort_conf) { + malloc_abort_invalid_conf(); + } else { + opt_hpa = false; + } + } + + malloc_init_state = malloc_init_a0_initialized; + + size_t buf_len = strlen(readlink_buf); + if (buf_len > 0) { + void *readlink_allocated = a0malloc(buf_len + 1); + if (readlink_allocated != NULL) { + memcpy(readlink_allocated, readlink_buf, buf_len + 1); + opt_malloc_conf_symlink = readlink_allocated; + } + } + + return false; +} + +bool +malloc_init_hard_a0(void) { + bool ret; + + malloc_mutex_lock(TSDN_NULL, &init_lock); + ret = malloc_init_hard_a0_locked(); + malloc_mutex_unlock(TSDN_NULL, &init_lock); + return ret; +} + +static void +stats_print_atexit(void) { + if (config_stats) { + tsdn_t *tsdn; + unsigned narenas, i; + + tsdn = tsdn_fetch(); + + /* + * Merge stats from extant threads. This is racy, since + * individual threads do not lock when recording tcache stats + * events. As a consequence, the final stats may be slightly + * out of date by the time they are reported, if other threads + * continue to allocate. + */ + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena = arena_get(tsdn, i, false); + if (arena != NULL) { + tcache_slow_t *tcache_slow; + + malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); + ql_foreach ( + tcache_slow, &arena->tcache_ql, link) { + tcache_stats_merge( + tsdn, tcache_slow->tcache, arena); + } + malloc_mutex_unlock( + tsdn, &arena->tcache_ql_mtx); + } + } + } + je_malloc_stats_print(NULL, NULL, opt_stats_print_opts); +} + +static unsigned +malloc_ncpus(void) { + long result; + +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + result = si.dwNumberOfProcessors; +#elif defined(CPU_COUNT) + /* + * glibc >= 2.6 has the CPU_COUNT macro. + * + * glibc's sysconf() uses isspace(). glibc allocates for the first time + * *before* setting up the isspace tables. Therefore we need a + * different method to get the number of CPUs. + * + * The getaffinity approach is also preferred when only a subset of CPUs + * is available, to avoid using more arenas than necessary. + */ + { +# if defined(__FreeBSD__) || defined(__DragonFly__) + cpuset_t set; +# else + cpu_set_t set; +# endif +# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) + sched_getaffinity(0, sizeof(set), &set); +# else + pthread_getaffinity_np(pthread_self(), sizeof(set), &set); +# endif + result = CPU_COUNT(&set); + } +#else + result = sysconf(_SC_NPROCESSORS_ONLN); +#endif + return ((result == -1) ? 1 : (unsigned)result); +} + +/* + * Ensure that number of CPUs is determistinc, i.e. it is the same based on: + * - sched_getaffinity() + * - _SC_NPROCESSORS_ONLN + * - _SC_NPROCESSORS_CONF + * Since otherwise tricky things is possible with percpu arenas in use. + */ +static bool +malloc_cpu_count_is_deterministic(void) { +#ifdef _WIN32 + return true; +#else + long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN); + long cpu_conf = sysconf(_SC_NPROCESSORS_CONF); + if (cpu_onln != cpu_conf) { + return false; + } +# if defined(CPU_COUNT) +# if defined(__FreeBSD__) || defined(__DragonFly__) + cpuset_t set; +# else + cpu_set_t set; +# endif /* __FreeBSD__ */ +# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) + sched_getaffinity(0, sizeof(set), &set); +# else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */ + pthread_getaffinity_np(pthread_self(), sizeof(set), &set); +# endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */ + long cpu_affinity = CPU_COUNT(&set); + if (cpu_affinity != cpu_conf) { + return false; + } +# endif /* CPU_COUNT */ + return true; +#endif +} + +/* Initialize data structures which may trigger recursive allocation. */ +static bool +malloc_init_hard_recursible(void) { + malloc_init_state = malloc_init_recursible; + + ncpus = malloc_ncpus(); + if (opt_percpu_arena != percpu_arena_disabled) { + bool cpu_count_is_deterministic = + malloc_cpu_count_is_deterministic(); + if (!cpu_count_is_deterministic) { + /* + * If # of CPU is not deterministic, and narenas not + * specified, disables per cpu arena since it may not + * detect CPU IDs properly. + */ + if (opt_narenas == 0) { + opt_percpu_arena = percpu_arena_disabled; + malloc_write( + ": Number of CPUs " + "detected is not deterministic. Per-CPU " + "arena disabled.\n"); + if (opt_abort_conf) { + malloc_abort_invalid_conf(); + } + if (opt_abort) { + abort(); + } + } + } + } + +#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \ + && !defined(JEMALLOC_ZONE) && !defined(_WIN32) \ + && !defined(__native_client__)) + /* LinuxThreads' pthread_atfork() allocates. */ + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, + jemalloc_postfork_child) + != 0) { + malloc_write(": Error in pthread_atfork()\n"); + if (opt_abort) { + abort(); + } + return true; + } +#endif + + if (background_thread_boot0()) { + return true; + } + + return false; +} + +static unsigned +malloc_narenas_default(void) { + assert(ncpus > 0); + /* + * For SMP systems, create more than one arena per CPU by + * default. + */ + if (ncpus > 1) { + fxp_t fxp_ncpus = FXP_INIT_INT(ncpus); + fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio); + uint32_t int_goal = fxp_round_nearest(goal); + if (int_goal == 0) { + return 1; + } + return int_goal; + } else { + return 1; + } +} + +static percpu_arena_mode_t +percpu_arena_as_initialized(percpu_arena_mode_t mode) { + assert(!malloc_initialized()); + assert(mode <= percpu_arena_disabled); + + if (mode != percpu_arena_disabled) { + mode += percpu_arena_mode_enabled_base; + } + + return mode; +} + +static bool +malloc_init_narenas(tsdn_t *tsdn) { + assert(ncpus > 0); + + if (opt_percpu_arena != percpu_arena_disabled) { + if (!have_percpu_arena || malloc_getcpu() < 0) { + opt_percpu_arena = percpu_arena_disabled; + malloc_printf( + ": perCPU arena getcpu() not " + "available. Setting narenas to %u.\n", + opt_narenas ? opt_narenas + : malloc_narenas_default()); + if (opt_abort) { + abort(); + } + } else { + if (ncpus >= MALLOCX_ARENA_LIMIT) { + malloc_printf( + ": narenas w/ percpu" + "arena beyond limit (%d)\n", + ncpus); + if (opt_abort) { + abort(); + } + return true; + } + /* NB: opt_percpu_arena isn't fully initialized yet. */ + if (percpu_arena_as_initialized(opt_percpu_arena) + == per_phycpu_arena + && ncpus % 2 != 0) { + malloc_printf( + ": invalid " + "configuration -- per physical CPU arena " + "with odd number (%u) of CPUs (no hyper " + "threading?).\n", + ncpus); + if (opt_abort) + abort(); + } + unsigned n = percpu_arena_ind_limit( + percpu_arena_as_initialized(opt_percpu_arena)); + if (opt_narenas < n) { + /* + * If narenas is specified with percpu_arena + * enabled, actual narenas is set as the greater + * of the two. percpu_arena_choose will be free + * to use any of the arenas based on CPU + * id. This is conservative (at a small cost) + * but ensures correctness. + * + * If for some reason the ncpus determined at + * boot is not the actual number (e.g. because + * of affinity setting from numactl), reserving + * narenas this way provides a workaround for + * percpu_arena. + */ + opt_narenas = n; + } + } + } + if (opt_narenas == 0) { + opt_narenas = malloc_narenas_default(); + } + assert(opt_narenas > 0); + + narenas_auto_set(opt_narenas); + /* + * Limit the number of arenas to the indexing range of MALLOCX_ARENA(). + */ + if (narenas_auto >= MALLOCX_ARENA_LIMIT) { + narenas_auto_set(MALLOCX_ARENA_LIMIT - 1); + malloc_printf(": Reducing narenas to limit (%d)\n", + narenas_auto); + } + narenas_total_set(narenas_auto); + if (arena_init_huge(tsdn, arena_get(tsdn, 0, false))) { + narenas_total_inc(); + } + manual_arena_base_set(narenas_total_get()); + + return false; +} + +static void +malloc_init_percpu(void) { + opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena); +} + +static bool +malloc_init_hard_finish(void) { + if (malloc_mutex_boot()) { + return true; + } + + malloc_init_state = malloc_init_initialized; + malloc_slow_flag_init(); + + return false; +} + +static void +malloc_init_hard_cleanup(tsdn_t *tsdn, bool reentrancy_set) { + malloc_mutex_assert_owner(tsdn, &init_lock); + malloc_mutex_unlock(tsdn, &init_lock); + if (reentrancy_set) { + assert(!tsdn_null(tsdn)); + tsd_t *tsd = tsdn_tsd(tsdn); + assert(tsd_reentrancy_level_get(tsd) > 0); + post_reentrancy(tsd); + } +} + +bool +malloc_init_hard(void) { + tsd_t *tsd; + + assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD); + assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD); + /* + * This asserts an extreme case where TINY_MAXCLASS is larger + * than LARGE_MINCLASS. It could only happen if some constants + * are configured miserably wrong. + */ + assert(SC_NTINY == 0 || SC_LG_TINY_MAXCLASS <= SC_LG_LARGE_MINCLASS); + +#if defined(_WIN32) && _WIN32_WINNT < 0x0600 + _init_init_lock(); +#endif + malloc_mutex_lock(TSDN_NULL, &init_lock); + +#define UNLOCK_RETURN(tsdn, ret, reentrancy) \ + malloc_init_hard_cleanup(tsdn, reentrancy); \ + return ret; + + if (!malloc_init_hard_needed()) { + UNLOCK_RETURN(TSDN_NULL, false, false) + } + + if (malloc_init_state != malloc_init_a0_initialized + && malloc_init_hard_a0_locked()) { + UNLOCK_RETURN(TSDN_NULL, true, false) + } + + malloc_mutex_unlock(TSDN_NULL, &init_lock); + /* Recursive allocation relies on functional tsd. */ + tsd = malloc_tsd_boot0(); + if (tsd == NULL) { + return true; + } + if (malloc_init_hard_recursible()) { + return true; + } + + malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); + /* Set reentrancy level to 1 during init. */ + pre_reentrancy(tsd, NULL); + /* Initialize narenas before prof_boot2 (for allocation). */ + if (malloc_init_narenas(tsd_tsdn(tsd)) + || background_thread_boot1(tsd_tsdn(tsd), b0get())) { + UNLOCK_RETURN(tsd_tsdn(tsd), true, true) + } + if (opt_hpa) { + /* + * We didn't initialize arena 0 hpa_shard in arena_new, because + * background_thread_enabled wasn't initialized yet, but we + * need it to set correct value for deferral_allowed. + */ + arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false); + hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts; + hpa_shard_opts.deferral_allowed = background_thread_enabled(); + if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard, + &hpa_shard_opts, &opt_hpa_sec_opts)) { + UNLOCK_RETURN(tsd_tsdn(tsd), true, true) + } + } + if (config_prof && prof_boot2(tsd, b0get())) { + UNLOCK_RETURN(tsd_tsdn(tsd), true, true) + } + + malloc_init_percpu(); + + if (malloc_init_hard_finish()) { + UNLOCK_RETURN(tsd_tsdn(tsd), true, true) + } + post_reentrancy(tsd); + malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); + + witness_assert_lockless( + witness_tsd_tsdn(tsd_witness_tsdp_get_unsafe(tsd))); + malloc_tsd_boot1(); + /* Update TSD after tsd_boot1. */ + tsd = tsd_fetch(); + if (opt_background_thread) { + assert(have_background_thread); + /* + * Need to finish init & unlock first before creating background + * threads (pthread_create depends on malloc). ctl_init (which + * sets isthreaded) needs to be called without holding any lock. + */ + background_thread_ctl_init(tsd_tsdn(tsd)); + if (background_thread_create(tsd, 0)) { + return true; + } + } +#undef UNLOCK_RETURN + return false; +}