#include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/arenas_management.h" #include "jemalloc/internal/conf.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/emap.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/hook.h" #include "jemalloc/internal/jemalloc_fork.h" #include "jemalloc/internal/jemalloc_init.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/spin.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" #ifdef JEMALLOC_THREADED_INIT /* Used to let the initializing thread recursively allocate. */ # define NO_INITIALIZER ((unsigned long)0) # define INITIALIZER pthread_self() static pthread_t malloc_initializer = NO_INITIALIZER; #else # define NO_INITIALIZER false # define INITIALIZER true static bool malloc_initializer = NO_INITIALIZER; #endif bool malloc_is_initializer(void) { #ifdef JEMALLOC_THREADED_INIT return pthread_equal(malloc_initializer, pthread_self()); #else return malloc_initializer; #endif } #ifdef JEMALLOC_THREADED_INIT static bool malloc_initializer_is_set(void) { return malloc_initializer != NO_INITIALIZER; } #endif static void malloc_initializer_set(void) { malloc_initializer = INITIALIZER; } /* Used to avoid initialization races. */ #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 static malloc_mutex_t init_lock = SRWLOCK_INIT; # else static malloc_mutex_t init_lock; static bool init_lock_initialized = false; JEMALLOC_ATTR(constructor) static void WINAPI _init_init_lock(void) { /* * If another constructor in the same binary is using mallctl to e.g. * set up extent hooks, it may end up running before this one, and * malloc_init_hard will crash trying to lock the uninitialized lock. So * we force an initialization of the lock in malloc_init_hard as well. * We don't try to care about atomicity of the accessed to the * init_lock_initialized boolean, since it really only matters early in * the process creation, before any separate thread normally starts * doing anything. */ if (!init_lock_initialized) { malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT, malloc_mutex_rank_exclusive); } init_lock_initialized = true; } # ifdef _MSC_VER # pragma section(".CRT$XCU", read) JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used) static const void(WINAPI *init_init_lock)(void) = _init_init_lock; # endif # endif #else static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #endif malloc_init_t malloc_init_state = malloc_init_uninitialized; /* When malloc_slow is true, set the corresponding bits for sanity check. */ enum { flag_opt_junk_alloc = (1U), flag_opt_junk_free = (1U << 1), flag_opt_zero = (1U << 2), flag_opt_utrace = (1U << 3), flag_opt_xmalloc = (1U << 4) }; static uint8_t malloc_slow_flags; static void malloc_slow_flag_init(void) { /* * Combine the runtime options into malloc_slow for fast path. Called * after bootstrap is complete. */ malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0) | (opt_junk_free ? flag_opt_junk_free : 0) | (opt_zero ? flag_opt_zero : 0) | (opt_utrace ? flag_opt_utrace : 0) | (opt_xmalloc ? flag_opt_xmalloc : 0); malloc_slow = (malloc_slow_flags != 0); } static void stats_print_atexit(void); static unsigned malloc_ncpus(void); static bool malloc_cpu_count_is_deterministic(void); static bool malloc_init_hard_needed(void) { if (malloc_initialized() || (malloc_is_initializer() && malloc_init_state == malloc_init_recursible)) { /* * Another thread initialized the allocator before this one * acquired init_lock, or this thread is the initializing * thread, and it is recursively allocating. */ return false; } #ifdef JEMALLOC_THREADED_INIT if (malloc_initializer_is_set() && !malloc_is_initializer()) { /* Busy-wait until the initializing thread completes. */ spin_t spinner = SPIN_INITIALIZER; do { malloc_mutex_unlock(TSDN_NULL, &init_lock); spin_adaptive(&spinner); malloc_mutex_lock(TSDN_NULL, &init_lock); } while (!malloc_initialized()); return false; } #endif return true; } static bool malloc_init_hard_a0_locked(void) { malloc_initializer_set(); JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS sc_data_t sc_data = {0}; JEMALLOC_DIAGNOSTIC_POP /* * Ordering here is somewhat tricky; we need sc_boot() first, since that * determines what the size classes will be, and then * malloc_conf_init(), since any slab size tweaking will need to be done * before sz_boot and bin_info_boot, which assume that the values they * read out of sc_data_global are final. */ sc_boot(&sc_data); unsigned bin_shard_sizes[SC_NBINS]; bin_shard_sizes_boot(bin_shard_sizes); /* * prof_boot0 only initializes opt_prof_prefix. We need to do it before * we parse malloc_conf options, in case malloc_conf parsing overwrites * it. */ if (config_prof) { prof_boot0(); } char readlink_buf[PATH_MAX + 1]; readlink_buf[0] = '\0'; malloc_conf_init(&sc_data, bin_shard_sizes, readlink_buf); san_init(opt_lg_san_uaf_align); sz_boot(&sc_data, opt_cache_oblivious); bin_info_boot(&sc_data, bin_shard_sizes); if (opt_stats_print) { /* Print statistics at exit. */ if (atexit(stats_print_atexit) != 0) { malloc_write(": Error in atexit()\n"); if (opt_abort) { abort(); } } } if (stats_boot()) { return true; } if (pages_boot()) { return true; } if (base_boot(TSDN_NULL)) { return true; } /* emap_global is static, hence zeroed. */ if (emap_init(&arena_emap_global, b0get(), /* zeroed */ true)) { return true; } if (extent_boot()) { return true; } if (ctl_boot()) { return true; } if (config_prof) { prof_boot1(); } if (opt_hpa && !hpa_supported()) { malloc_printf( ": HPA not supported in the current " "configuration; %s.", opt_abort_conf ? "aborting" : "disabling"); if (opt_abort_conf) { malloc_abort_invalid_conf(); } else { opt_hpa = false; } } if (arena_boot(&sc_data, b0get(), opt_hpa)) { return true; } if (tcache_boot(TSDN_NULL, b0get())) { return true; } if (arenas_management_boot()) { return true; } hook_boot(); experimental_thread_events_boot(); /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ narenas_auto_set(1); manual_arena_base_set(narenas_auto + 1); memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in * arena_choose_hard(). */ if (arena_init(TSDN_NULL, 0, &arena_config_default) == NULL) { return true; } if (opt_hpa && !hpa_supported()) { malloc_printf( ": HPA not supported in the current " "configuration; %s.", opt_abort_conf ? "aborting" : "disabling"); if (opt_abort_conf) { malloc_abort_invalid_conf(); } else { opt_hpa = false; } } malloc_init_state = malloc_init_a0_initialized; size_t buf_len = strlen(readlink_buf); if (buf_len > 0) { void *readlink_allocated = a0malloc(buf_len + 1); if (readlink_allocated != NULL) { memcpy(readlink_allocated, readlink_buf, buf_len + 1); opt_malloc_conf_symlink = readlink_allocated; } } return false; } bool malloc_init_hard_a0(void) { bool ret; malloc_mutex_lock(TSDN_NULL, &init_lock); ret = malloc_init_hard_a0_locked(); malloc_mutex_unlock(TSDN_NULL, &init_lock); return ret; } static void stats_print_atexit(void) { if (config_stats) { tsdn_t *tsdn; unsigned narenas, i; tsdn = tsdn_fetch(); /* * Merge stats from extant threads. This is racy, since * individual threads do not lock when recording tcache stats * events. As a consequence, the final stats may be slightly * out of date by the time they are reported, if other threads * continue to allocate. */ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena = arena_get(tsdn, i, false); if (arena != NULL) { arena_cache_bins_stats_merge(tsdn, arena); } } } je_malloc_stats_print(NULL, NULL, opt_stats_print_opts); } static unsigned malloc_ncpus(void) { long result; #ifdef _WIN32 SYSTEM_INFO si; GetSystemInfo(&si); result = si.dwNumberOfProcessors; #elif defined(CPU_COUNT) /* * glibc >= 2.6 has the CPU_COUNT macro. * * glibc's sysconf() uses isspace(). glibc allocates for the first time * *before* setting up the isspace tables. Therefore we need a * different method to get the number of CPUs. * * The getaffinity approach is also preferred when only a subset of CPUs * is available, to avoid using more arenas than necessary. */ { # if defined(__FreeBSD__) || defined(__DragonFly__) cpuset_t set; # else cpu_set_t set; # endif # if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) sched_getaffinity(0, sizeof(set), &set); # else pthread_getaffinity_np(pthread_self(), sizeof(set), &set); # endif result = CPU_COUNT(&set); } #else result = sysconf(_SC_NPROCESSORS_ONLN); #endif return ((result == -1) ? 1 : (unsigned)result); } /* * Ensure that number of CPUs is determistinc, i.e. it is the same based on: * - sched_getaffinity() * - _SC_NPROCESSORS_ONLN * - _SC_NPROCESSORS_CONF * Since otherwise tricky things is possible with percpu arenas in use. */ static bool malloc_cpu_count_is_deterministic(void) { #ifdef _WIN32 return true; #else long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN); long cpu_conf = sysconf(_SC_NPROCESSORS_CONF); if (cpu_onln != cpu_conf) { return false; } # if defined(CPU_COUNT) # if defined(__FreeBSD__) || defined(__DragonFly__) cpuset_t set; # else cpu_set_t set; # endif /* __FreeBSD__ */ # if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) sched_getaffinity(0, sizeof(set), &set); # else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */ pthread_getaffinity_np(pthread_self(), sizeof(set), &set); # endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */ long cpu_affinity = CPU_COUNT(&set); if (cpu_affinity != cpu_conf) { return false; } # endif /* CPU_COUNT */ return true; #endif } /* Initialize data structures which may trigger recursive allocation. */ static bool malloc_init_hard_recursible(void) { malloc_init_state = malloc_init_recursible; ncpus = malloc_ncpus(); if (opt_percpu_arena != percpu_arena_disabled) { bool cpu_count_is_deterministic = malloc_cpu_count_is_deterministic(); if (!cpu_count_is_deterministic) { /* * If # of CPU is not deterministic, and narenas not * specified, disables per cpu arena since it may not * detect CPU IDs properly. */ if (opt_narenas == 0) { opt_percpu_arena = percpu_arena_disabled; malloc_write( ": Number of CPUs " "detected is not deterministic. Per-CPU " "arena disabled.\n"); if (opt_abort_conf) { malloc_abort_invalid_conf(); } if (opt_abort) { abort(); } } } } #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \ && !defined(JEMALLOC_ZONE) && !defined(_WIN32) \ && !defined(__native_client__)) /* LinuxThreads' pthread_atfork() allocates. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { malloc_write(": Error in pthread_atfork()\n"); if (opt_abort) { abort(); } return true; } #endif if (background_thread_boot0()) { return true; } return false; } static unsigned malloc_narenas_default(void) { assert(ncpus > 0); /* * For SMP systems, create more than one arena per CPU by * default. */ if (ncpus > 1) { fxp_t fxp_ncpus = FXP_INIT_INT(ncpus); fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio); uint32_t int_goal = fxp_round_nearest(goal); if (int_goal == 0) { return 1; } return int_goal; } else { return 1; } } static percpu_arena_mode_t percpu_arena_as_initialized(percpu_arena_mode_t mode) { assert(!malloc_initialized()); assert(mode <= percpu_arena_disabled); if (mode != percpu_arena_disabled) { mode += percpu_arena_mode_enabled_base; } return mode; } static bool malloc_init_narenas(tsdn_t *tsdn) { assert(ncpus > 0); if (opt_percpu_arena != percpu_arena_disabled) { if (!have_percpu_arena || malloc_getcpu() < 0) { opt_percpu_arena = percpu_arena_disabled; malloc_printf( ": perCPU arena getcpu() not " "available. Setting narenas to %u.\n", opt_narenas ? opt_narenas : malloc_narenas_default()); if (opt_abort) { abort(); } } else { if (ncpus >= MALLOCX_ARENA_LIMIT) { malloc_printf( ": narenas w/ percpu" "arena beyond limit (%d)\n", ncpus); if (opt_abort) { abort(); } return true; } /* NB: opt_percpu_arena isn't fully initialized yet. */ if (percpu_arena_as_initialized(opt_percpu_arena) == per_phycpu_arena && ncpus % 2 != 0) { malloc_printf( ": invalid " "configuration -- per physical CPU arena " "with odd number (%u) of CPUs (no hyper " "threading?).\n", ncpus); if (opt_abort) abort(); } unsigned n = percpu_arena_ind_limit( percpu_arena_as_initialized(opt_percpu_arena)); if (opt_narenas < n) { /* * If narenas is specified with percpu_arena * enabled, actual narenas is set as the greater * of the two. percpu_arena_choose will be free * to use any of the arenas based on CPU * id. This is conservative (at a small cost) * but ensures correctness. * * If for some reason the ncpus determined at * boot is not the actual number (e.g. because * of affinity setting from numactl), reserving * narenas this way provides a workaround for * percpu_arena. */ opt_narenas = n; } } } if (opt_narenas == 0) { opt_narenas = malloc_narenas_default(); } assert(opt_narenas > 0); narenas_auto_set(opt_narenas); /* * Limit the number of arenas to the indexing range of MALLOCX_ARENA(). */ if (narenas_auto >= MALLOCX_ARENA_LIMIT) { narenas_auto_set(MALLOCX_ARENA_LIMIT - 1); malloc_printf(": Reducing narenas to limit (%d)\n", narenas_auto); } narenas_total_set(narenas_auto); if (arena_init_huge(tsdn, arena_get(tsdn, 0, false))) { narenas_total_inc(); } manual_arena_base_set(narenas_total_get()); return false; } static void malloc_init_percpu(void) { opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena); } static bool malloc_init_hard_finish(void) { if (malloc_mutex_boot()) { return true; } malloc_init_state = malloc_init_initialized; malloc_slow_flag_init(); return false; } static void malloc_init_hard_cleanup(tsdn_t *tsdn, bool reentrancy_set) { malloc_mutex_assert_owner(tsdn, &init_lock); malloc_mutex_unlock(tsdn, &init_lock); if (reentrancy_set) { assert(!tsdn_null(tsdn)); tsd_t *tsd = tsdn_tsd(tsdn); assert(tsd_reentrancy_level_get(tsd) > 0); post_reentrancy(tsd); } } bool malloc_init_hard(void) { tsd_t *tsd; assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD); assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD); /* * This asserts an extreme case where TINY_MAXCLASS is larger * than LARGE_MINCLASS. It could only happen if some constants * are configured miserably wrong. */ assert(SC_NTINY == 0 || SC_LG_TINY_MAXCLASS <= SC_LG_LARGE_MINCLASS); #if defined(_WIN32) && _WIN32_WINNT < 0x0600 _init_init_lock(); #endif malloc_mutex_lock(TSDN_NULL, &init_lock); #define UNLOCK_RETURN(tsdn, ret, reentrancy) \ malloc_init_hard_cleanup(tsdn, reentrancy); \ return ret; if (!malloc_init_hard_needed()) { UNLOCK_RETURN(TSDN_NULL, false, false) } if (malloc_init_state != malloc_init_a0_initialized && malloc_init_hard_a0_locked()) { UNLOCK_RETURN(TSDN_NULL, true, false) } malloc_mutex_unlock(TSDN_NULL, &init_lock); /* Recursive allocation relies on functional tsd. */ tsd = malloc_tsd_boot0(); if (tsd == NULL) { return true; } if (malloc_init_hard_recursible()) { return true; } malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); /* Set reentrancy level to 1 during init. */ pre_reentrancy(tsd, NULL); /* Initialize narenas before prof_boot2 (for allocation). */ if (malloc_init_narenas(tsd_tsdn(tsd)) || background_thread_boot1(tsd_tsdn(tsd), b0get())) { UNLOCK_RETURN(tsd_tsdn(tsd), true, true) } if (opt_hpa) { /* * We didn't initialize arena 0 hpa_shard in arena_new, because * background_thread_enabled wasn't initialized yet, but we * need it to set correct value for deferral_allowed. */ arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false); hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts; hpa_shard_opts.deferral_allowed = background_thread_enabled(); if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard, &hpa_shard_opts, &opt_hpa_sec_opts)) { UNLOCK_RETURN(tsd_tsdn(tsd), true, true) } } if (config_prof && prof_boot2(tsd, b0get())) { UNLOCK_RETURN(tsd_tsdn(tsd), true, true) } malloc_init_percpu(); if (malloc_init_hard_finish()) { UNLOCK_RETURN(tsd_tsdn(tsd), true, true) } post_reentrancy(tsd); malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); witness_assert_lockless( witness_tsd_tsdn(tsd_witness_tsdp_get_unsafe(tsd))); malloc_tsd_boot1(); /* Update TSD after tsd_boot1. */ tsd = tsd_fetch(); if (opt_background_thread) { assert(have_background_thread); /* * Need to finish init & unlock first before creating background * threads (pthread_create depends on malloc). ctl_init (which * sets isthreaded) needs to be called without holding any lock. */ background_thread_ctl_init(tsd_tsdn(tsd)); if (background_thread_create(tsd, 0)) { return true; } } #undef UNLOCK_RETURN return false; } /* * If an application creates a thread before doing any allocation in the main * thread, then calls fork(2) in the main thread followed by memory allocation * in the child process, a race can occur that results in deadlock within the * child: the main thread may have forked while the created thread had * partially initialized the allocator. Ordinarily jemalloc prevents * fork/malloc races via the following functions it registers during * initialization using pthread_atfork(), but of course that does no good if * the allocator isn't fully initialized at fork time. The following library * constructor is a partial solution to this problem. It may still be possible * to trigger the deadlock described above, but doing so would involve forking * via a library constructor that runs before jemalloc's runs. */ #ifndef JEMALLOC_JET JEMALLOC_ATTR(constructor) static void jemalloc_constructor(void) { malloc_init(); } #endif