diff --git a/Makefile.in b/Makefile.in
index 59aa8e5a..a8d5ff5e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -95,6 +95,7 @@ LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix)
BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof
C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
C_SRCS := $(srcroot)src/jemalloc.c \
+ $(srcroot)src/jemalloc_init.c \
$(srcroot)src/arena.c \
$(srcroot)src/arenas_management.c \
$(srcroot)src/background_thread.c \
diff --git a/include/jemalloc/internal/jemalloc_init.h b/include/jemalloc/internal/jemalloc_init.h
new file mode 100644
index 00000000..6e154be9
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_init.h
@@ -0,0 +1,42 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_INIT_H
+#define JEMALLOC_INTERNAL_JEMALLOC_INIT_H
+
+enum malloc_init_e {
+ malloc_init_uninitialized = 3,
+ malloc_init_a0_initialized = 2,
+ malloc_init_recursible = 1,
+ malloc_init_initialized = 0 /* Common case --> jnz. */
+};
+typedef enum malloc_init_e malloc_init_t;
+
+extern malloc_init_t malloc_init_state;
+
+bool malloc_is_initializer(void);
+bool malloc_initializer_is_set(void);
+void malloc_initializer_set(void);
+
+bool malloc_init_hard_a0(void);
+bool malloc_init_hard(void);
+
+JEMALLOC_ALWAYS_INLINE bool
+malloc_init_a0(void) {
+ if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
+ return malloc_init_hard_a0();
+ }
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+malloc_initialized(void) {
+ return (malloc_init_state == malloc_init_initialized);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+malloc_init(void) {
+ if (unlikely(!malloc_initialized()) && malloc_init_hard()) {
+ return true;
+ }
+ return false;
+}
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_INIT_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index f714fff8..43057b1a 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -37,7 +37,6 @@ extern bool opt_zero;
extern unsigned opt_narenas;
extern fxp_t opt_narenas_ratio;
extern zero_realloc_action_t opt_zero_realloc_action;
-extern malloc_init_t malloc_init_state;
extern const char *const zero_realloc_mode_names[];
extern atomic_zu_t zero_realloc_count;
extern bool opt_cache_oblivious;
@@ -54,9 +53,6 @@ extern uintptr_t san_cache_bin_nonfast_mask;
/* Number of CPUs. */
extern unsigned ncpus;
-/* Will be refactored in subsequent commit */
-bool malloc_init_hard_a0(void);
-
void *bootstrap_malloc(size_t size);
void *bootstrap_calloc(size_t num, size_t size);
void bootstrap_free(void *ptr);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 2c61f8c4..5dc14a7b 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -6,6 +6,7 @@
#include "jemalloc/internal/arena_inlines_b.h"
#include "jemalloc/internal/emap.h"
#include "jemalloc/internal/hook.h"
+#include "jemalloc/internal/jemalloc_init.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/log.h"
#include "jemalloc/internal/sz.h"
@@ -280,11 +281,6 @@ fastpath_success_finish(
}
}
-JEMALLOC_ALWAYS_INLINE bool
-malloc_initialized(void) {
- return (malloc_init_state == malloc_init_initialized);
-}
-
/*
* malloc() fastpath. Included here so that we can inline it into operator new;
* function call overhead there is non-negligible as a fraction of total CPU in
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 0ade5461..985dc15f 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -20,14 +20,6 @@ typedef enum zero_realloc_action_e zero_realloc_action_t;
/* Signature of write callback. */
typedef void(write_cb_t)(void *, const char *);
-enum malloc_init_e {
- malloc_init_uninitialized = 3,
- malloc_init_a0_initialized = 2,
- malloc_init_recursible = 1,
- malloc_init_initialized = 0 /* Common case --> jnz. */
-};
-typedef enum malloc_init_e malloc_init_t;
-
/*
* Flags bits:
*
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 63e49118..881e1862 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -69,6 +69,7 @@
+
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index c0100096..7595606f 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -208,5 +208,8 @@
Source Files
+
+ Source Files
+
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 409f2195..b655de65 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -69,6 +69,7 @@
+
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index c0100096..7595606f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -208,5 +208,8 @@
Source Files
+
+ Source Files
+
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 963ef5cb..790d79d8 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -69,6 +69,7 @@
+
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index c0100096..7595606f 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -208,5 +208,8 @@
Source Files
+
+ Source Files
+
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 84e57f28..9dfc7d84 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -69,6 +69,7 @@
+
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index c0100096..7595606f 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -208,5 +208,8 @@
Source Files
+
+ Source Files
+
diff --git a/src/arenas_management.c b/src/arenas_management.c
index 03246bfd..261557b6 100644
--- a/src/arenas_management.c
+++ b/src/arenas_management.c
@@ -2,6 +2,7 @@
#include "jemalloc/internal/jemalloc_internal_includes.h"
#include "jemalloc/internal/arenas_management.h"
+#include "jemalloc/internal/jemalloc_init.h"
#include "jemalloc/internal/malloc_io.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/sz.h"
@@ -69,8 +70,7 @@ manual_arena_base_set(unsigned base) {
void *
a0ialloc(size_t size, bool zero, bool is_internal) {
- if (unlikely(malloc_init_state == malloc_init_uninitialized)
- && malloc_init_hard_a0()) {
+ if (unlikely(malloc_init_a0())) {
return NULL;
}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 45fd568a..76835068 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -12,6 +12,7 @@
#include "jemalloc/internal/fxp.h"
#include "jemalloc/internal/san.h"
#include "jemalloc/internal/hook.h"
+#include "jemalloc/internal/jemalloc_init.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/log.h"
#include "jemalloc/internal/malloc_io.h"
@@ -182,74 +183,9 @@ bool opt_hpa = false;
hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
-malloc_init_t malloc_init_state = malloc_init_uninitialized;
-
/* False should be the common case. Set to true to trigger initialization. */
bool malloc_slow = true;
-/* When malloc_slow is true, set the corresponding bits for sanity check. */
-enum {
- flag_opt_junk_alloc = (1U),
- flag_opt_junk_free = (1U << 1),
- flag_opt_zero = (1U << 2),
- flag_opt_utrace = (1U << 3),
- flag_opt_xmalloc = (1U << 4)
-};
-static uint8_t malloc_slow_flags;
-
-#ifdef JEMALLOC_THREADED_INIT
-/* Used to let the initializing thread recursively allocate. */
-# define NO_INITIALIZER ((unsigned long)0)
-# define INITIALIZER pthread_self()
-# define IS_INITIALIZER \
- (pthread_equal(malloc_initializer, pthread_self()))
-static pthread_t malloc_initializer = NO_INITIALIZER;
-#else
-# define NO_INITIALIZER false
-# define INITIALIZER true
-# define IS_INITIALIZER malloc_initializer
-static bool malloc_initializer = NO_INITIALIZER;
-#endif
-
-/* Used to avoid initialization races. */
-#ifdef _WIN32
-# if _WIN32_WINNT >= 0x0600
-static malloc_mutex_t init_lock = SRWLOCK_INIT;
-# else
-static malloc_mutex_t init_lock;
-static bool init_lock_initialized = false;
-
-JEMALLOC_ATTR(constructor)
-static void WINAPI
-_init_init_lock(void) {
- /*
- * If another constructor in the same binary is using mallctl to e.g.
- * set up extent hooks, it may end up running before this one, and
- * malloc_init_hard will crash trying to lock the uninitialized lock. So
- * we force an initialization of the lock in malloc_init_hard as well.
- * We don't try to care about atomicity of the accessed to the
- * init_lock_initialized boolean, since it really only matters early in
- * the process creation, before any separate thread normally starts
- * doing anything.
- */
- if (!init_lock_initialized) {
- malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT,
- malloc_mutex_rank_exclusive);
- }
- init_lock_initialized = true;
-}
-
-# ifdef _MSC_VER
-# pragma section(".CRT$XCU", read)
-JEMALLOC_SECTION(".CRT$XCU")
-JEMALLOC_ATTR(used)
-static const void(WINAPI *init_init_lock)(void) = _init_init_lock;
-# endif
-# endif
-#else
-static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
-#endif
-
typedef struct {
void *p; /* Input pointer (as in realloc(p, s)). */
size_t s; /* Request size. */
@@ -274,35 +210,11 @@ typedef struct {
#endif
-/******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
-
-static bool malloc_init_hard(void);
-
/******************************************************************************/
/*
* Begin miscellaneous support functions.
*/
-JEMALLOC_ALWAYS_INLINE bool
-malloc_init_a0(void) {
- if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
- return malloc_init_hard_a0();
- }
- return false;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-malloc_init(void) {
- if (unlikely(!malloc_initialized()) && malloc_init_hard()) {
- return true;
- }
- return false;
-}
-
/*
* FreeBSD's libc uses the bootstrap_*() functions in bootstrap-sensitive
* situations that cannot tolerate TLS variable access (TLS allocation and very
@@ -340,40 +252,6 @@ bootstrap_free(void *ptr) {
a0idalloc(ptr, false);
}
-static void
-stats_print_atexit(void) {
- if (config_stats) {
- tsdn_t *tsdn;
- unsigned narenas, i;
-
- tsdn = tsdn_fetch();
-
- /*
- * Merge stats from extant threads. This is racy, since
- * individual threads do not lock when recording tcache stats
- * events. As a consequence, the final stats may be slightly
- * out of date by the time they are reported, if other threads
- * continue to allocate.
- */
- for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
- arena_t *arena = arena_get(tsdn, i, false);
- if (arena != NULL) {
- tcache_slow_t *tcache_slow;
-
- malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
- ql_foreach (
- tcache_slow, &arena->tcache_ql, link) {
- tcache_stats_merge(
- tsdn, tcache_slow->tcache, arena);
- }
- malloc_mutex_unlock(
- tsdn, &arena->tcache_ql_mtx);
- }
- }
- }
- je_malloc_stats_print(NULL, NULL, opt_stats_print_opts);
-}
-
/*
* Ensure that we don't hold any locks upon entry to or exit from allocator
* code (in a "broad" sense that doesn't count a reentrant allocation as an
@@ -403,563 +281,6 @@ check_entry_exit_locking(tsdn_t *tsdn) {
* End miscellaneous support functions.
*/
/******************************************************************************/
-/*
- * Begin initialization functions.
- */
-
-static unsigned
-malloc_ncpus(void) {
- long result;
-
-#ifdef _WIN32
- SYSTEM_INFO si;
- GetSystemInfo(&si);
- result = si.dwNumberOfProcessors;
-#elif defined(CPU_COUNT)
- /*
- * glibc >= 2.6 has the CPU_COUNT macro.
- *
- * glibc's sysconf() uses isspace(). glibc allocates for the first time
- * *before* setting up the isspace tables. Therefore we need a
- * different method to get the number of CPUs.
- *
- * The getaffinity approach is also preferred when only a subset of CPUs
- * is available, to avoid using more arenas than necessary.
- */
- {
-# if defined(__FreeBSD__) || defined(__DragonFly__)
- cpuset_t set;
-# else
- cpu_set_t set;
-# endif
-# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
- sched_getaffinity(0, sizeof(set), &set);
-# else
- pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
-# endif
- result = CPU_COUNT(&set);
- }
-#else
- result = sysconf(_SC_NPROCESSORS_ONLN);
-#endif
- return ((result == -1) ? 1 : (unsigned)result);
-}
-
-/*
- * Ensure that number of CPUs is determistinc, i.e. it is the same based on:
- * - sched_getaffinity()
- * - _SC_NPROCESSORS_ONLN
- * - _SC_NPROCESSORS_CONF
- * Since otherwise tricky things is possible with percpu arenas in use.
- */
-static bool
-malloc_cpu_count_is_deterministic(void) {
-#ifdef _WIN32
- return true;
-#else
- long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN);
- long cpu_conf = sysconf(_SC_NPROCESSORS_CONF);
- if (cpu_onln != cpu_conf) {
- return false;
- }
-# if defined(CPU_COUNT)
-# if defined(__FreeBSD__) || defined(__DragonFly__)
- cpuset_t set;
-# else
- cpu_set_t set;
-# endif /* __FreeBSD__ */
-# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
- sched_getaffinity(0, sizeof(set), &set);
-# else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
- pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
-# endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
- long cpu_affinity = CPU_COUNT(&set);
- if (cpu_affinity != cpu_conf) {
- return false;
- }
-# endif /* CPU_COUNT */
- return true;
-#endif
-}
-
-static void
-malloc_slow_flag_init(void) {
- /*
- * Combine the runtime options into malloc_slow for fast path. Called
- * after processing all the options.
- */
- malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0)
- | (opt_junk_free ? flag_opt_junk_free : 0)
- | (opt_zero ? flag_opt_zero : 0)
- | (opt_utrace ? flag_opt_utrace : 0)
- | (opt_xmalloc ? flag_opt_xmalloc : 0);
-
- malloc_slow = (malloc_slow_flags != 0);
-}
-
-static bool
-malloc_init_hard_needed(void) {
- if (malloc_initialized()
- || (IS_INITIALIZER
- && malloc_init_state == malloc_init_recursible)) {
- /*
- * Another thread initialized the allocator before this one
- * acquired init_lock, or this thread is the initializing
- * thread, and it is recursively allocating.
- */
- return false;
- }
-#ifdef JEMALLOC_THREADED_INIT
- if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
- /* Busy-wait until the initializing thread completes. */
- spin_t spinner = SPIN_INITIALIZER;
- do {
- malloc_mutex_unlock(TSDN_NULL, &init_lock);
- spin_adaptive(&spinner);
- malloc_mutex_lock(TSDN_NULL, &init_lock);
- } while (!malloc_initialized());
- return false;
- }
-#endif
- return true;
-}
-
-static bool
-malloc_init_hard_a0_locked(void) {
- malloc_initializer = INITIALIZER;
-
- JEMALLOC_DIAGNOSTIC_PUSH
- JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
- sc_data_t sc_data = {0};
- JEMALLOC_DIAGNOSTIC_POP
-
- /*
- * Ordering here is somewhat tricky; we need sc_boot() first, since that
- * determines what the size classes will be, and then
- * malloc_conf_init(), since any slab size tweaking will need to be done
- * before sz_boot and bin_info_boot, which assume that the values they
- * read out of sc_data_global are final.
- */
- sc_boot(&sc_data);
- unsigned bin_shard_sizes[SC_NBINS];
- bin_shard_sizes_boot(bin_shard_sizes);
- /*
- * prof_boot0 only initializes opt_prof_prefix. We need to do it before
- * we parse malloc_conf options, in case malloc_conf parsing overwrites
- * it.
- */
- if (config_prof) {
- prof_boot0();
- }
- char readlink_buf[PATH_MAX + 1];
- readlink_buf[0] = '\0';
- malloc_conf_init(&sc_data, bin_shard_sizes, readlink_buf);
- san_init(opt_lg_san_uaf_align);
- sz_boot(&sc_data, opt_cache_oblivious);
- bin_info_boot(&sc_data, bin_shard_sizes);
-
- if (opt_stats_print) {
- /* Print statistics at exit. */
- if (atexit(stats_print_atexit) != 0) {
- malloc_write(": Error in atexit()\n");
- if (opt_abort) {
- abort();
- }
- }
- }
-
- if (stats_boot()) {
- return true;
- }
- if (pages_boot()) {
- return true;
- }
- if (base_boot(TSDN_NULL)) {
- return true;
- }
- /* emap_global is static, hence zeroed. */
- if (emap_init(&arena_emap_global, b0get(), /* zeroed */ true)) {
- return true;
- }
- if (extent_boot()) {
- return true;
- }
- if (ctl_boot()) {
- return true;
- }
- if (config_prof) {
- prof_boot1();
- }
- if (opt_hpa && !hpa_supported()) {
- malloc_printf(
- ": HPA not supported in the current "
- "configuration; %s.",
- opt_abort_conf ? "aborting" : "disabling");
- if (opt_abort_conf) {
- malloc_abort_invalid_conf();
- } else {
- opt_hpa = false;
- }
- }
- if (arena_boot(&sc_data, b0get(), opt_hpa)) {
- return true;
- }
- if (tcache_boot(TSDN_NULL, b0get())) {
- return true;
- }
- if (arenas_management_boot()) {
- return true;
- }
- hook_boot();
- experimental_thread_events_boot();
- /*
- * Create enough scaffolding to allow recursive allocation in
- * malloc_ncpus().
- */
- narenas_auto_set(1);
- manual_arena_base_set(narenas_auto + 1);
- memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
- /*
- * Initialize one arena here. The rest are lazily created in
- * arena_choose_hard().
- */
- if (arena_init(TSDN_NULL, 0, &arena_config_default) == NULL) {
- return true;
- }
-
- if (opt_hpa && !hpa_supported()) {
- malloc_printf(
- ": HPA not supported in the current "
- "configuration; %s.",
- opt_abort_conf ? "aborting" : "disabling");
- if (opt_abort_conf) {
- malloc_abort_invalid_conf();
- } else {
- opt_hpa = false;
- }
- }
-
- malloc_init_state = malloc_init_a0_initialized;
-
- size_t buf_len = strlen(readlink_buf);
- if (buf_len > 0) {
- void *readlink_allocated = a0ialloc(buf_len + 1, false, true);
- if (readlink_allocated != NULL) {
- memcpy(readlink_allocated, readlink_buf, buf_len + 1);
- opt_malloc_conf_symlink = readlink_allocated;
- }
- }
-
- return false;
-}
-
-bool
-malloc_init_hard_a0(void) {
- bool ret;
-
- malloc_mutex_lock(TSDN_NULL, &init_lock);
- ret = malloc_init_hard_a0_locked();
- malloc_mutex_unlock(TSDN_NULL, &init_lock);
- return ret;
-}
-
-/* Initialize data structures which may trigger recursive allocation. */
-static bool
-malloc_init_hard_recursible(void) {
- malloc_init_state = malloc_init_recursible;
-
- ncpus = malloc_ncpus();
- if (opt_percpu_arena != percpu_arena_disabled) {
- bool cpu_count_is_deterministic =
- malloc_cpu_count_is_deterministic();
- if (!cpu_count_is_deterministic) {
- /*
- * If # of CPU is not deterministic, and narenas not
- * specified, disables per cpu arena since it may not
- * detect CPU IDs properly.
- */
- if (opt_narenas == 0) {
- opt_percpu_arena = percpu_arena_disabled;
- malloc_write(
- ": Number of CPUs "
- "detected is not deterministic. Per-CPU "
- "arena disabled.\n");
- if (opt_abort_conf) {
- malloc_abort_invalid_conf();
- }
- if (opt_abort) {
- abort();
- }
- }
- }
- }
-
-#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
- && !defined(JEMALLOC_ZONE) && !defined(_WIN32) \
- && !defined(__native_client__))
- /* LinuxThreads' pthread_atfork() allocates. */
- if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
- jemalloc_postfork_child)
- != 0) {
- malloc_write(": Error in pthread_atfork()\n");
- if (opt_abort) {
- abort();
- }
- return true;
- }
-#endif
-
- if (background_thread_boot0()) {
- return true;
- }
-
- return false;
-}
-
-static unsigned
-malloc_narenas_default(void) {
- assert(ncpus > 0);
- /*
- * For SMP systems, create more than one arena per CPU by
- * default.
- */
- if (ncpus > 1) {
- fxp_t fxp_ncpus = FXP_INIT_INT(ncpus);
- fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
- uint32_t int_goal = fxp_round_nearest(goal);
- if (int_goal == 0) {
- return 1;
- }
- return int_goal;
- } else {
- return 1;
- }
-}
-
-static percpu_arena_mode_t
-percpu_arena_as_initialized(percpu_arena_mode_t mode) {
- assert(!malloc_initialized());
- assert(mode <= percpu_arena_disabled);
-
- if (mode != percpu_arena_disabled) {
- mode += percpu_arena_mode_enabled_base;
- }
-
- return mode;
-}
-
-static bool
-malloc_init_narenas(tsdn_t *tsdn) {
- assert(ncpus > 0);
-
- if (opt_percpu_arena != percpu_arena_disabled) {
- if (!have_percpu_arena || malloc_getcpu() < 0) {
- opt_percpu_arena = percpu_arena_disabled;
- malloc_printf(
- ": perCPU arena getcpu() not "
- "available. Setting narenas to %u.\n",
- opt_narenas ? opt_narenas
- : malloc_narenas_default());
- if (opt_abort) {
- abort();
- }
- } else {
- if (ncpus >= MALLOCX_ARENA_LIMIT) {
- malloc_printf(
- ": narenas w/ percpu"
- "arena beyond limit (%d)\n",
- ncpus);
- if (opt_abort) {
- abort();
- }
- return true;
- }
- /* NB: opt_percpu_arena isn't fully initialized yet. */
- if (percpu_arena_as_initialized(opt_percpu_arena)
- == per_phycpu_arena
- && ncpus % 2 != 0) {
- malloc_printf(
- ": invalid "
- "configuration -- per physical CPU arena "
- "with odd number (%u) of CPUs (no hyper "
- "threading?).\n",
- ncpus);
- if (opt_abort)
- abort();
- }
- unsigned n = percpu_arena_ind_limit(
- percpu_arena_as_initialized(opt_percpu_arena));
- if (opt_narenas < n) {
- /*
- * If narenas is specified with percpu_arena
- * enabled, actual narenas is set as the greater
- * of the two. percpu_arena_choose will be free
- * to use any of the arenas based on CPU
- * id. This is conservative (at a small cost)
- * but ensures correctness.
- *
- * If for some reason the ncpus determined at
- * boot is not the actual number (e.g. because
- * of affinity setting from numactl), reserving
- * narenas this way provides a workaround for
- * percpu_arena.
- */
- opt_narenas = n;
- }
- }
- }
- if (opt_narenas == 0) {
- opt_narenas = malloc_narenas_default();
- }
- assert(opt_narenas > 0);
-
- narenas_auto_set(opt_narenas);
- /*
- * Limit the number of arenas to the indexing range of MALLOCX_ARENA().
- */
- if (narenas_auto >= MALLOCX_ARENA_LIMIT) {
- narenas_auto_set(MALLOCX_ARENA_LIMIT - 1);
- malloc_printf(": Reducing narenas to limit (%d)\n",
- narenas_auto);
- }
- narenas_total_set(narenas_auto);
- if (arena_init_huge(tsdn, arena_get(tsdn, 0, false))) {
- narenas_total_inc();
- }
- manual_arena_base_set(narenas_total_get());
-
- return false;
-}
-
-static void
-malloc_init_percpu(void) {
- opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena);
-}
-
-static bool
-malloc_init_hard_finish(void) {
- if (malloc_mutex_boot()) {
- return true;
- }
-
- malloc_init_state = malloc_init_initialized;
- malloc_slow_flag_init();
-
- return false;
-}
-
-static void
-malloc_init_hard_cleanup(tsdn_t *tsdn, bool reentrancy_set) {
- malloc_mutex_assert_owner(tsdn, &init_lock);
- malloc_mutex_unlock(tsdn, &init_lock);
- if (reentrancy_set) {
- assert(!tsdn_null(tsdn));
- tsd_t *tsd = tsdn_tsd(tsdn);
- assert(tsd_reentrancy_level_get(tsd) > 0);
- post_reentrancy(tsd);
- }
-}
-
-static bool
-malloc_init_hard(void) {
- tsd_t *tsd;
-
- assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
- assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
- /*
- * This asserts an extreme case where TINY_MAXCLASS is larger
- * than LARGE_MINCLASS. It could only happen if some constants
- * are configured miserably wrong.
- */
- assert(SC_NTINY == 0 || SC_LG_TINY_MAXCLASS <= SC_LG_LARGE_MINCLASS);
-
-#if defined(_WIN32) && _WIN32_WINNT < 0x0600
- _init_init_lock();
-#endif
- malloc_mutex_lock(TSDN_NULL, &init_lock);
-
-#define UNLOCK_RETURN(tsdn, ret, reentrancy) \
- malloc_init_hard_cleanup(tsdn, reentrancy); \
- return ret;
-
- if (!malloc_init_hard_needed()) {
- UNLOCK_RETURN(TSDN_NULL, false, false)
- }
-
- if (malloc_init_state != malloc_init_a0_initialized
- && malloc_init_hard_a0_locked()) {
- UNLOCK_RETURN(TSDN_NULL, true, false)
- }
-
- malloc_mutex_unlock(TSDN_NULL, &init_lock);
- /* Recursive allocation relies on functional tsd. */
- tsd = malloc_tsd_boot0();
- if (tsd == NULL) {
- return true;
- }
- if (malloc_init_hard_recursible()) {
- return true;
- }
-
- malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
- /* Set reentrancy level to 1 during init. */
- pre_reentrancy(tsd, NULL);
- /* Initialize narenas before prof_boot2 (for allocation). */
- if (malloc_init_narenas(tsd_tsdn(tsd))
- || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
- UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
- }
- if (opt_hpa) {
- /*
- * We didn't initialize arena 0 hpa_shard in arena_new, because
- * background_thread_enabled wasn't initialized yet, but we
- * need it to set correct value for deferral_allowed.
- */
- arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false);
- hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
- hpa_shard_opts.deferral_allowed = background_thread_enabled();
- if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard,
- &hpa_shard_opts, &opt_hpa_sec_opts)) {
- UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
- }
- }
- if (config_prof && prof_boot2(tsd, b0get())) {
- UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
- }
-
- malloc_init_percpu();
-
- if (malloc_init_hard_finish()) {
- UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
- }
- post_reentrancy(tsd);
- malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
-
- witness_assert_lockless(
- witness_tsd_tsdn(tsd_witness_tsdp_get_unsafe(tsd)));
- malloc_tsd_boot1();
- /* Update TSD after tsd_boot1. */
- tsd = tsd_fetch();
- if (opt_background_thread) {
- assert(have_background_thread);
- /*
- * Need to finish init & unlock first before creating background
- * threads (pthread_create depends on malloc). ctl_init (which
- * sets isthreaded) needs to be called without holding any lock.
- */
- background_thread_ctl_init(tsd_tsdn(tsd));
- if (background_thread_create(tsd, 0)) {
- return true;
- }
- }
-#undef UNLOCK_RETURN
- return false;
-}
-
-/*
- * End initialization functions.
- */
-/******************************************************************************/
/*
* Begin allocation-path internal functions and data structures.
*/
@@ -1610,7 +931,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
}
assert(ptr != NULL);
- assert(malloc_initialized() || IS_INITIALIZER);
+ assert(malloc_initialized() || malloc_is_initializer());
emap_alloc_ctx_t alloc_ctx;
emap_alloc_ctx_lookup(
@@ -1644,7 +965,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
}
assert(ptr != NULL);
- assert(malloc_initialized() || IS_INITIALIZER);
+ assert(malloc_initialized() || malloc_is_initializer());
emap_alloc_ctx_t alloc_ctx;
szind_t szind = sz_size2index(usize);
@@ -2158,7 +1479,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
assert(ptr != NULL);
assert(size != 0);
- assert(malloc_initialized() || IS_INITIALIZER);
+ assert(malloc_initialized() || malloc_is_initializer());
tsd = tsd_fetch();
check_entry_exit_locking(tsd_tsdn(tsd));
@@ -2432,7 +1753,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
assert(ptr != NULL);
assert(size != 0);
assert(SIZE_T_MAX - size >= extra);
- assert(malloc_initialized() || IS_INITIALIZER);
+ assert(malloc_initialized() || malloc_is_initializer());
tsd = tsd_fetch();
check_entry_exit_locking(tsd_tsdn(tsd));
@@ -2515,7 +1836,7 @@ JEMALLOC_ATTR(pure) je_sallocx(const void *ptr, int flags) {
LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
- assert(malloc_initialized() || IS_INITIALIZER);
+ assert(malloc_initialized() || malloc_is_initializer());
assert(ptr != NULL);
tsdn = tsdn_fetch();
@@ -2539,7 +1860,7 @@ je_dallocx(void *ptr, int flags) {
LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
assert(ptr != NULL);
- assert(malloc_initialized() || IS_INITIALIZER);
+ assert(malloc_initialized() || malloc_is_initializer());
tsd_t *tsd = tsd_fetch_min();
bool fast = tsd_fast(tsd);
@@ -2576,7 +1897,7 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
JEMALLOC_NOINLINE void
sdallocx_default(void *ptr, size_t size, int flags) {
assert(ptr != NULL);
- assert(malloc_initialized() || IS_INITIALIZER);
+ assert(malloc_initialized() || malloc_is_initializer());
tsd_t *tsd = tsd_fetch_min();
bool fast = tsd_fast(tsd);
@@ -2726,7 +2047,7 @@ je_malloc_stats_print(
JEMALLOC_ALWAYS_INLINE size_t
je_malloc_usable_size_impl(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
- assert(malloc_initialized() || IS_INITIALIZER);
+ assert(malloc_initialized() || malloc_is_initializer());
tsdn_t *tsdn = tsdn_fetch();
check_entry_exit_locking(tsdn);
diff --git a/src/jemalloc_init.c b/src/jemalloc_init.c
new file mode 100644
index 00000000..37e1350f
--- /dev/null
+++ b/src/jemalloc_init.c
@@ -0,0 +1,687 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/arenas_management.h"
+#include "jemalloc/internal/conf.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/hook.h"
+#include "jemalloc/internal/jemalloc_init.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/safety_check.h"
+#include "jemalloc/internal/san.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/spin.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/thread_event.h"
+
+#ifdef JEMALLOC_THREADED_INIT
+/* Used to let the initializing thread recursively allocate. */
+# define NO_INITIALIZER ((unsigned long)0)
+# define INITIALIZER pthread_self()
+static pthread_t malloc_initializer = NO_INITIALIZER;
+#else
+# define NO_INITIALIZER false
+# define INITIALIZER true
+static bool malloc_initializer = NO_INITIALIZER;
+#endif
+
+bool
+malloc_is_initializer(void) {
+#ifdef JEMALLOC_THREADED_INIT
+ return pthread_equal(malloc_initializer, pthread_self());
+#else
+ return malloc_initializer;
+#endif
+}
+
+bool
+malloc_initializer_is_set(void) {
+ return malloc_initializer != NO_INITIALIZER;
+}
+
+void
+malloc_initializer_set(void) {
+ malloc_initializer = INITIALIZER;
+}
+
+/* Used to avoid initialization races. */
+#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+static malloc_mutex_t init_lock = SRWLOCK_INIT;
+# else
+static malloc_mutex_t init_lock;
+static bool init_lock_initialized = false;
+
+JEMALLOC_ATTR(constructor)
+static void WINAPI
+_init_init_lock(void) {
+ /*
+ * If another constructor in the same binary is using mallctl to e.g.
+ * set up extent hooks, it may end up running before this one, and
+ * malloc_init_hard will crash trying to lock the uninitialized lock. So
+ * we force an initialization of the lock in malloc_init_hard as well.
+ * We don't try to care about atomicity of the accessed to the
+ * init_lock_initialized boolean, since it really only matters early in
+ * the process creation, before any separate thread normally starts
+ * doing anything.
+ */
+ if (!init_lock_initialized) {
+ malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT,
+ malloc_mutex_rank_exclusive);
+ }
+ init_lock_initialized = true;
+}
+
+# ifdef _MSC_VER
+# pragma section(".CRT$XCU", read)
+JEMALLOC_SECTION(".CRT$XCU")
+JEMALLOC_ATTR(used)
+static const void(WINAPI *init_init_lock)(void) = _init_init_lock;
+# endif
+# endif
+#else
+static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
+#endif
+
+malloc_init_t malloc_init_state = malloc_init_uninitialized;
+
+/* When malloc_slow is true, set the corresponding bits for sanity check. */
+enum {
+ flag_opt_junk_alloc = (1U),
+ flag_opt_junk_free = (1U << 1),
+ flag_opt_zero = (1U << 2),
+ flag_opt_utrace = (1U << 3),
+ flag_opt_xmalloc = (1U << 4)
+};
+static uint8_t malloc_slow_flags;
+
+static void
+malloc_slow_flag_init(void) {
+ /*
+ * Combine the runtime options into malloc_slow for fast path. Called
+ * after bootstrap is complete.
+ */
+ malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0)
+ | (opt_junk_free ? flag_opt_junk_free : 0)
+ | (opt_zero ? flag_opt_zero : 0)
+ | (opt_utrace ? flag_opt_utrace : 0)
+ | (opt_xmalloc ? flag_opt_xmalloc : 0);
+
+ malloc_slow = (malloc_slow_flags != 0);
+}
+
+static void stats_print_atexit(void);
+static unsigned malloc_ncpus(void);
+static bool malloc_cpu_count_is_deterministic(void);
+
+static bool
+malloc_init_hard_needed(void) {
+ if (malloc_initialized()
+ || (malloc_is_initializer()
+ && malloc_init_state == malloc_init_recursible)) {
+ /*
+ * Another thread initialized the allocator before this one
+ * acquired init_lock, or this thread is the initializing
+ * thread, and it is recursively allocating.
+ */
+ return false;
+ }
+#ifdef JEMALLOC_THREADED_INIT
+ if (malloc_initializer_is_set() && !malloc_is_initializer()) {
+ /* Busy-wait until the initializing thread completes. */
+ spin_t spinner = SPIN_INITIALIZER;
+ do {
+ malloc_mutex_unlock(TSDN_NULL, &init_lock);
+ spin_adaptive(&spinner);
+ malloc_mutex_lock(TSDN_NULL, &init_lock);
+ } while (!malloc_initialized());
+ return false;
+ }
+#endif
+ return true;
+}
+
+static bool
+malloc_init_hard_a0_locked(void) {
+ malloc_initializer_set();
+
+ JEMALLOC_DIAGNOSTIC_PUSH
+ JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+ sc_data_t sc_data = {0};
+ JEMALLOC_DIAGNOSTIC_POP
+
+ /*
+ * Ordering here is somewhat tricky; we need sc_boot() first, since that
+ * determines what the size classes will be, and then
+ * malloc_conf_init(), since any slab size tweaking will need to be done
+ * before sz_boot and bin_info_boot, which assume that the values they
+ * read out of sc_data_global are final.
+ */
+ sc_boot(&sc_data);
+ unsigned bin_shard_sizes[SC_NBINS];
+ bin_shard_sizes_boot(bin_shard_sizes);
+ /*
+ * prof_boot0 only initializes opt_prof_prefix. We need to do it before
+ * we parse malloc_conf options, in case malloc_conf parsing overwrites
+ * it.
+ */
+ if (config_prof) {
+ prof_boot0();
+ }
+ char readlink_buf[PATH_MAX + 1];
+ readlink_buf[0] = '\0';
+ malloc_conf_init(&sc_data, bin_shard_sizes, readlink_buf);
+ san_init(opt_lg_san_uaf_align);
+ sz_boot(&sc_data, opt_cache_oblivious);
+ bin_info_boot(&sc_data, bin_shard_sizes);
+
+ if (opt_stats_print) {
+ /* Print statistics at exit. */
+ if (atexit(stats_print_atexit) != 0) {
+ malloc_write(": Error in atexit()\n");
+ if (opt_abort) {
+ abort();
+ }
+ }
+ }
+
+ if (stats_boot()) {
+ return true;
+ }
+ if (pages_boot()) {
+ return true;
+ }
+ if (base_boot(TSDN_NULL)) {
+ return true;
+ }
+ /* emap_global is static, hence zeroed. */
+ if (emap_init(&arena_emap_global, b0get(), /* zeroed */ true)) {
+ return true;
+ }
+ if (extent_boot()) {
+ return true;
+ }
+ if (ctl_boot()) {
+ return true;
+ }
+ if (config_prof) {
+ prof_boot1();
+ }
+ if (opt_hpa && !hpa_supported()) {
+ malloc_printf(
+ ": HPA not supported in the current "
+ "configuration; %s.",
+ opt_abort_conf ? "aborting" : "disabling");
+ if (opt_abort_conf) {
+ malloc_abort_invalid_conf();
+ } else {
+ opt_hpa = false;
+ }
+ }
+ if (arena_boot(&sc_data, b0get(), opt_hpa)) {
+ return true;
+ }
+ if (tcache_boot(TSDN_NULL, b0get())) {
+ return true;
+ }
+ if (arenas_management_boot()) {
+ return true;
+ }
+ hook_boot();
+ experimental_thread_events_boot();
+ /*
+ * Create enough scaffolding to allow recursive allocation in
+ * malloc_ncpus().
+ */
+ narenas_auto_set(1);
+ manual_arena_base_set(narenas_auto + 1);
+ memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
+ /*
+ * Initialize one arena here. The rest are lazily created in
+ * arena_choose_hard().
+ */
+ if (arena_init(TSDN_NULL, 0, &arena_config_default) == NULL) {
+ return true;
+ }
+
+ if (opt_hpa && !hpa_supported()) {
+ malloc_printf(
+ ": HPA not supported in the current "
+ "configuration; %s.",
+ opt_abort_conf ? "aborting" : "disabling");
+ if (opt_abort_conf) {
+ malloc_abort_invalid_conf();
+ } else {
+ opt_hpa = false;
+ }
+ }
+
+ malloc_init_state = malloc_init_a0_initialized;
+
+ size_t buf_len = strlen(readlink_buf);
+ if (buf_len > 0) {
+ void *readlink_allocated = a0malloc(buf_len + 1);
+ if (readlink_allocated != NULL) {
+ memcpy(readlink_allocated, readlink_buf, buf_len + 1);
+ opt_malloc_conf_symlink = readlink_allocated;
+ }
+ }
+
+ return false;
+}
+
+bool
+malloc_init_hard_a0(void) {
+ bool ret;
+
+ malloc_mutex_lock(TSDN_NULL, &init_lock);
+ ret = malloc_init_hard_a0_locked();
+ malloc_mutex_unlock(TSDN_NULL, &init_lock);
+ return ret;
+}
+
+static void
+stats_print_atexit(void) {
+ if (config_stats) {
+ tsdn_t *tsdn;
+ unsigned narenas, i;
+
+ tsdn = tsdn_fetch();
+
+ /*
+ * Merge stats from extant threads. This is racy, since
+ * individual threads do not lock when recording tcache stats
+ * events. As a consequence, the final stats may be slightly
+ * out of date by the time they are reported, if other threads
+ * continue to allocate.
+ */
+ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
+ arena_t *arena = arena_get(tsdn, i, false);
+ if (arena != NULL) {
+ tcache_slow_t *tcache_slow;
+
+ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
+ ql_foreach (
+ tcache_slow, &arena->tcache_ql, link) {
+ tcache_stats_merge(
+ tsdn, tcache_slow->tcache, arena);
+ }
+ malloc_mutex_unlock(
+ tsdn, &arena->tcache_ql_mtx);
+ }
+ }
+ }
+ je_malloc_stats_print(NULL, NULL, opt_stats_print_opts);
+}
+
+static unsigned
+malloc_ncpus(void) {
+ long result;
+
+#ifdef _WIN32
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+ result = si.dwNumberOfProcessors;
+#elif defined(CPU_COUNT)
+ /*
+ * glibc >= 2.6 has the CPU_COUNT macro.
+ *
+ * glibc's sysconf() uses isspace(). glibc allocates for the first time
+ * *before* setting up the isspace tables. Therefore we need a
+ * different method to get the number of CPUs.
+ *
+ * The getaffinity approach is also preferred when only a subset of CPUs
+ * is available, to avoid using more arenas than necessary.
+ */
+ {
+# if defined(__FreeBSD__) || defined(__DragonFly__)
+ cpuset_t set;
+# else
+ cpu_set_t set;
+# endif
+# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+ sched_getaffinity(0, sizeof(set), &set);
+# else
+ pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+# endif
+ result = CPU_COUNT(&set);
+ }
+#else
+ result = sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+ return ((result == -1) ? 1 : (unsigned)result);
+}
+
+/*
+ * Ensure that number of CPUs is determistinc, i.e. it is the same based on:
+ * - sched_getaffinity()
+ * - _SC_NPROCESSORS_ONLN
+ * - _SC_NPROCESSORS_CONF
+ * Since otherwise tricky things is possible with percpu arenas in use.
+ */
+static bool
+malloc_cpu_count_is_deterministic(void) {
+#ifdef _WIN32
+ return true;
+#else
+ long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN);
+ long cpu_conf = sysconf(_SC_NPROCESSORS_CONF);
+ if (cpu_onln != cpu_conf) {
+ return false;
+ }
+# if defined(CPU_COUNT)
+# if defined(__FreeBSD__) || defined(__DragonFly__)
+ cpuset_t set;
+# else
+ cpu_set_t set;
+# endif /* __FreeBSD__ */
+# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+ sched_getaffinity(0, sizeof(set), &set);
+# else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
+ pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+# endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
+ long cpu_affinity = CPU_COUNT(&set);
+ if (cpu_affinity != cpu_conf) {
+ return false;
+ }
+# endif /* CPU_COUNT */
+ return true;
+#endif
+}
+
+/* Initialize data structures which may trigger recursive allocation. */
+static bool
+malloc_init_hard_recursible(void) {
+ malloc_init_state = malloc_init_recursible;
+
+ ncpus = malloc_ncpus();
+ if (opt_percpu_arena != percpu_arena_disabled) {
+ bool cpu_count_is_deterministic =
+ malloc_cpu_count_is_deterministic();
+ if (!cpu_count_is_deterministic) {
+ /*
+ * If # of CPU is not deterministic, and narenas not
+ * specified, disables per cpu arena since it may not
+ * detect CPU IDs properly.
+ */
+ if (opt_narenas == 0) {
+ opt_percpu_arena = percpu_arena_disabled;
+ malloc_write(
+ ": Number of CPUs "
+ "detected is not deterministic. Per-CPU "
+ "arena disabled.\n");
+ if (opt_abort_conf) {
+ malloc_abort_invalid_conf();
+ }
+ if (opt_abort) {
+ abort();
+ }
+ }
+ }
+ }
+
+#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
+ && !defined(JEMALLOC_ZONE) && !defined(_WIN32) \
+ && !defined(__native_client__))
+ /* LinuxThreads' pthread_atfork() allocates. */
+ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
+ jemalloc_postfork_child)
+ != 0) {
+ malloc_write(": Error in pthread_atfork()\n");
+ if (opt_abort) {
+ abort();
+ }
+ return true;
+ }
+#endif
+
+ if (background_thread_boot0()) {
+ return true;
+ }
+
+ return false;
+}
+
+static unsigned
+malloc_narenas_default(void) {
+ assert(ncpus > 0);
+ /*
+ * For SMP systems, create more than one arena per CPU by
+ * default.
+ */
+ if (ncpus > 1) {
+ fxp_t fxp_ncpus = FXP_INIT_INT(ncpus);
+ fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
+ uint32_t int_goal = fxp_round_nearest(goal);
+ if (int_goal == 0) {
+ return 1;
+ }
+ return int_goal;
+ } else {
+ return 1;
+ }
+}
+
+static percpu_arena_mode_t
+percpu_arena_as_initialized(percpu_arena_mode_t mode) {
+ assert(!malloc_initialized());
+ assert(mode <= percpu_arena_disabled);
+
+ if (mode != percpu_arena_disabled) {
+ mode += percpu_arena_mode_enabled_base;
+ }
+
+ return mode;
+}
+
+static bool
+malloc_init_narenas(tsdn_t *tsdn) {
+ assert(ncpus > 0);
+
+ if (opt_percpu_arena != percpu_arena_disabled) {
+ if (!have_percpu_arena || malloc_getcpu() < 0) {
+ opt_percpu_arena = percpu_arena_disabled;
+ malloc_printf(
+ ": perCPU arena getcpu() not "
+ "available. Setting narenas to %u.\n",
+ opt_narenas ? opt_narenas
+ : malloc_narenas_default());
+ if (opt_abort) {
+ abort();
+ }
+ } else {
+ if (ncpus >= MALLOCX_ARENA_LIMIT) {
+ malloc_printf(
+ ": narenas w/ percpu"
+ "arena beyond limit (%d)\n",
+ ncpus);
+ if (opt_abort) {
+ abort();
+ }
+ return true;
+ }
+ /* NB: opt_percpu_arena isn't fully initialized yet. */
+ if (percpu_arena_as_initialized(opt_percpu_arena)
+ == per_phycpu_arena
+ && ncpus % 2 != 0) {
+ malloc_printf(
+ ": invalid "
+ "configuration -- per physical CPU arena "
+ "with odd number (%u) of CPUs (no hyper "
+ "threading?).\n",
+ ncpus);
+ if (opt_abort)
+ abort();
+ }
+ unsigned n = percpu_arena_ind_limit(
+ percpu_arena_as_initialized(opt_percpu_arena));
+ if (opt_narenas < n) {
+ /*
+ * If narenas is specified with percpu_arena
+ * enabled, actual narenas is set as the greater
+ * of the two. percpu_arena_choose will be free
+ * to use any of the arenas based on CPU
+ * id. This is conservative (at a small cost)
+ * but ensures correctness.
+ *
+ * If for some reason the ncpus determined at
+ * boot is not the actual number (e.g. because
+ * of affinity setting from numactl), reserving
+ * narenas this way provides a workaround for
+ * percpu_arena.
+ */
+ opt_narenas = n;
+ }
+ }
+ }
+ if (opt_narenas == 0) {
+ opt_narenas = malloc_narenas_default();
+ }
+ assert(opt_narenas > 0);
+
+ narenas_auto_set(opt_narenas);
+ /*
+ * Limit the number of arenas to the indexing range of MALLOCX_ARENA().
+ */
+ if (narenas_auto >= MALLOCX_ARENA_LIMIT) {
+ narenas_auto_set(MALLOCX_ARENA_LIMIT - 1);
+ malloc_printf(": Reducing narenas to limit (%d)\n",
+ narenas_auto);
+ }
+ narenas_total_set(narenas_auto);
+ if (arena_init_huge(tsdn, arena_get(tsdn, 0, false))) {
+ narenas_total_inc();
+ }
+ manual_arena_base_set(narenas_total_get());
+
+ return false;
+}
+
+static void
+malloc_init_percpu(void) {
+ opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena);
+}
+
+static bool
+malloc_init_hard_finish(void) {
+ if (malloc_mutex_boot()) {
+ return true;
+ }
+
+ malloc_init_state = malloc_init_initialized;
+ malloc_slow_flag_init();
+
+ return false;
+}
+
+static void
+malloc_init_hard_cleanup(tsdn_t *tsdn, bool reentrancy_set) {
+ malloc_mutex_assert_owner(tsdn, &init_lock);
+ malloc_mutex_unlock(tsdn, &init_lock);
+ if (reentrancy_set) {
+ assert(!tsdn_null(tsdn));
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ assert(tsd_reentrancy_level_get(tsd) > 0);
+ post_reentrancy(tsd);
+ }
+}
+
+bool
+malloc_init_hard(void) {
+ tsd_t *tsd;
+
+ assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
+ assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
+ /*
+ * This asserts an extreme case where TINY_MAXCLASS is larger
+ * than LARGE_MINCLASS. It could only happen if some constants
+ * are configured miserably wrong.
+ */
+ assert(SC_NTINY == 0 || SC_LG_TINY_MAXCLASS <= SC_LG_LARGE_MINCLASS);
+
+#if defined(_WIN32) && _WIN32_WINNT < 0x0600
+ _init_init_lock();
+#endif
+ malloc_mutex_lock(TSDN_NULL, &init_lock);
+
+#define UNLOCK_RETURN(tsdn, ret, reentrancy) \
+ malloc_init_hard_cleanup(tsdn, reentrancy); \
+ return ret;
+
+ if (!malloc_init_hard_needed()) {
+ UNLOCK_RETURN(TSDN_NULL, false, false)
+ }
+
+ if (malloc_init_state != malloc_init_a0_initialized
+ && malloc_init_hard_a0_locked()) {
+ UNLOCK_RETURN(TSDN_NULL, true, false)
+ }
+
+ malloc_mutex_unlock(TSDN_NULL, &init_lock);
+ /* Recursive allocation relies on functional tsd. */
+ tsd = malloc_tsd_boot0();
+ if (tsd == NULL) {
+ return true;
+ }
+ if (malloc_init_hard_recursible()) {
+ return true;
+ }
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
+ /* Set reentrancy level to 1 during init. */
+ pre_reentrancy(tsd, NULL);
+ /* Initialize narenas before prof_boot2 (for allocation). */
+ if (malloc_init_narenas(tsd_tsdn(tsd))
+ || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
+ UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+ }
+ if (opt_hpa) {
+ /*
+ * We didn't initialize arena 0 hpa_shard in arena_new, because
+ * background_thread_enabled wasn't initialized yet, but we
+ * need it to set correct value for deferral_allowed.
+ */
+ arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false);
+ hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
+ hpa_shard_opts.deferral_allowed = background_thread_enabled();
+ if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard,
+ &hpa_shard_opts, &opt_hpa_sec_opts)) {
+ UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+ }
+ }
+ if (config_prof && prof_boot2(tsd, b0get())) {
+ UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+ }
+
+ malloc_init_percpu();
+
+ if (malloc_init_hard_finish()) {
+ UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+ }
+ post_reentrancy(tsd);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
+
+ witness_assert_lockless(
+ witness_tsd_tsdn(tsd_witness_tsdp_get_unsafe(tsd)));
+ malloc_tsd_boot1();
+ /* Update TSD after tsd_boot1. */
+ tsd = tsd_fetch();
+ if (opt_background_thread) {
+ assert(have_background_thread);
+ /*
+ * Need to finish init & unlock first before creating background
+ * threads (pthread_create depends on malloc). ctl_init (which
+ * sets isthreaded) needs to be called without holding any lock.
+ */
+ background_thread_ctl_init(tsd_tsdn(tsd));
+ if (background_thread_create(tsd, 0)) {
+ return true;
+ }
+ }
+#undef UNLOCK_RETURN
+ return false;
+}